@yinyoudexing/xml2word 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/createDocxZip-BWHSZ7VQ.js +500 -0
- package/dist/createDocxZip-BWHSZ7VQ.js.map +1 -0
- package/dist/htmlToWordBodyXml-LY6DZSTW.js +877 -0
- package/dist/htmlToWordBodyXml-LY6DZSTW.js.map +1 -0
- package/dist/index.cjs +947 -72
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +30 -7
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/createDocxZip-WVDRDYZT.js +0 -109
- package/dist/createDocxZip-WVDRDYZT.js.map +0 -1
- package/dist/htmlToWordBodyXml-RFBPSL2Q.js +0 -416
- package/dist/htmlToWordBodyXml-RFBPSL2Q.js.map +0 -1
|
@@ -0,0 +1,877 @@
|
|
|
1
|
+
// src/lib/htmlToWordBodyXml.ts
|
|
2
|
+
import { parseDocument } from "htmlparser2";
|
|
3
|
+
function escapeXmlText(value) {
|
|
4
|
+
return value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
5
|
+
}
|
|
6
|
+
function shouldPreserveSpace(text) {
|
|
7
|
+
if (!text) return false;
|
|
8
|
+
return /^\s/.test(text) || /\s$/.test(text) || /\s{2,}/.test(text);
|
|
9
|
+
}
|
|
10
|
+
function shouldKeepWhitespaceOnlyRun(text) {
|
|
11
|
+
if (!text) return false;
|
|
12
|
+
if (/\r|\n/.test(text)) return false;
|
|
13
|
+
if (text.includes("\xA0")) return true;
|
|
14
|
+
return /\s{2,}/.test(text);
|
|
15
|
+
}
|
|
16
|
+
function parseStyleAttribute(style) {
|
|
17
|
+
if (!style) return {};
|
|
18
|
+
const normalized = style.replace(/\r/g, "\n");
|
|
19
|
+
const parts = normalized.split(";");
|
|
20
|
+
const entries = [];
|
|
21
|
+
for (const part of parts) {
|
|
22
|
+
const idx = part.indexOf(":");
|
|
23
|
+
if (idx <= 0) continue;
|
|
24
|
+
const key = part.slice(0, idx).trim().toLowerCase();
|
|
25
|
+
const val = part.slice(idx + 1).trim();
|
|
26
|
+
if (!key || !val) continue;
|
|
27
|
+
entries.push([key, val]);
|
|
28
|
+
}
|
|
29
|
+
return Object.fromEntries(entries);
|
|
30
|
+
}
|
|
31
|
+
function parseRgbToHex(value) {
|
|
32
|
+
const m = value.trim().toLowerCase().match(/^rgb\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*\)$/);
|
|
33
|
+
if (!m) return void 0;
|
|
34
|
+
const nums = [Number(m[1]), Number(m[2]), Number(m[3])];
|
|
35
|
+
if (nums.some((n) => Number.isNaN(n) || n < 0 || n > 255)) return void 0;
|
|
36
|
+
return nums.map((n) => n.toString(16).padStart(2, "0")).join("").toUpperCase();
|
|
37
|
+
}
|
|
38
|
+
function parseCssColorToHex(value) {
|
|
39
|
+
if (!value) return void 0;
|
|
40
|
+
const v = value.trim();
|
|
41
|
+
const hex = v.match(/^#([0-9a-fA-F]{6})$/)?.[1];
|
|
42
|
+
if (hex) return hex.toUpperCase();
|
|
43
|
+
return parseRgbToHex(v);
|
|
44
|
+
}
|
|
45
|
+
function parseFontSizeToHalfPoints(value) {
|
|
46
|
+
if (!value) return void 0;
|
|
47
|
+
const v = value.trim().toLowerCase();
|
|
48
|
+
const pt = v.match(/^(\d+(?:\.\d+)?)pt$/);
|
|
49
|
+
if (pt) return Math.max(1, Math.round(Number(pt[1]) * 2));
|
|
50
|
+
const px = v.match(/^(\d+(?:\.\d+)?)px$/);
|
|
51
|
+
if (px) {
|
|
52
|
+
const ptValue = Number(px[1]) * 72 / 96;
|
|
53
|
+
return Math.max(1, Math.round(ptValue * 2));
|
|
54
|
+
}
|
|
55
|
+
return void 0;
|
|
56
|
+
}
|
|
57
|
+
function normalizeFontFamily(value) {
|
|
58
|
+
if (!value) return void 0;
|
|
59
|
+
const first = value.split(",")[0]?.trim();
|
|
60
|
+
if (!first) return void 0;
|
|
61
|
+
return first.replace(/^["']|["']$/g, "");
|
|
62
|
+
}
|
|
63
|
+
function mergeTextStyle(base, patch) {
|
|
64
|
+
return {
|
|
65
|
+
bold: patch.bold ?? base.bold,
|
|
66
|
+
italic: patch.italic ?? base.italic,
|
|
67
|
+
underline: patch.underline ?? base.underline,
|
|
68
|
+
colorHex: patch.colorHex ?? base.colorHex,
|
|
69
|
+
fontFamily: patch.fontFamily ?? base.fontFamily,
|
|
70
|
+
fontSizeHalfPoints: patch.fontSizeHalfPoints ?? base.fontSizeHalfPoints
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
function styleFromElement(node) {
|
|
74
|
+
const tag = node.name?.toLowerCase();
|
|
75
|
+
const styleAttr = node.attribs?.style;
|
|
76
|
+
const css = parseStyleAttribute(styleAttr);
|
|
77
|
+
const boldFromCss = (() => {
|
|
78
|
+
const v = css["font-weight"]?.trim().toLowerCase();
|
|
79
|
+
if (!v) return void 0;
|
|
80
|
+
if (v === "bold" || v === "bolder") return true;
|
|
81
|
+
const n = Number(v);
|
|
82
|
+
if (!Number.isNaN(n)) return n >= 600;
|
|
83
|
+
return void 0;
|
|
84
|
+
})();
|
|
85
|
+
const italicFromCss = (() => {
|
|
86
|
+
const v = css["font-style"]?.trim().toLowerCase();
|
|
87
|
+
if (!v) return void 0;
|
|
88
|
+
if (v === "italic" || v === "oblique") return true;
|
|
89
|
+
return void 0;
|
|
90
|
+
})();
|
|
91
|
+
const underlineFromCss = (() => {
|
|
92
|
+
const v = css["text-decoration"]?.trim().toLowerCase();
|
|
93
|
+
if (!v) return void 0;
|
|
94
|
+
return v.includes("underline");
|
|
95
|
+
})();
|
|
96
|
+
const tagBold = tag === "b" || tag === "strong" ? true : void 0;
|
|
97
|
+
const tagItalic = tag === "i" || tag === "em" ? true : void 0;
|
|
98
|
+
const tagUnderline = tag === "u" ? true : void 0;
|
|
99
|
+
return {
|
|
100
|
+
bold: tagBold ?? boldFromCss,
|
|
101
|
+
italic: tagItalic ?? italicFromCss,
|
|
102
|
+
underline: tagUnderline ?? underlineFromCss,
|
|
103
|
+
colorHex: parseCssColorToHex(css.color),
|
|
104
|
+
fontFamily: normalizeFontFamily(css["font-family"]),
|
|
105
|
+
fontSizeHalfPoints: parseFontSizeToHalfPoints(css["font-size"])
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
function getTextContent(node) {
|
|
109
|
+
if (node.type === "text") return node.data ?? "";
|
|
110
|
+
let out = "";
|
|
111
|
+
const children = node.children ?? [];
|
|
112
|
+
for (const c of children) out += getTextContent(c);
|
|
113
|
+
return out;
|
|
114
|
+
}
|
|
115
|
+
var IMAGE_RELATIONSHIP_ID_OFFSET = 7;
|
|
116
|
+
function decodeBase64ToUint8Array(base64) {
|
|
117
|
+
const BufferCtor = globalThis.Buffer;
|
|
118
|
+
if (BufferCtor) {
|
|
119
|
+
return new Uint8Array(BufferCtor.from(base64, "base64"));
|
|
120
|
+
}
|
|
121
|
+
const atobFn = globalThis.atob;
|
|
122
|
+
if (!atobFn) {
|
|
123
|
+
throw new Error("Base64 decode is not available in this environment.");
|
|
124
|
+
}
|
|
125
|
+
const bin = atobFn(base64);
|
|
126
|
+
const bytes = new Uint8Array(bin.length);
|
|
127
|
+
for (let i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i);
|
|
128
|
+
return bytes;
|
|
129
|
+
}
|
|
130
|
+
function parseImageDataUrl(src) {
|
|
131
|
+
const m = src.match(/^data:(image\/png|image\/jpeg);base64,([\s\S]+)$/i);
|
|
132
|
+
if (!m) return void 0;
|
|
133
|
+
const contentType = m[1].toLowerCase();
|
|
134
|
+
const base64 = m[2].replace(/\s+/g, "");
|
|
135
|
+
const data = decodeBase64ToUint8Array(base64);
|
|
136
|
+
const extension = contentType === "image/png" ? "png" : "jpeg";
|
|
137
|
+
return { contentType, data, extension };
|
|
138
|
+
}
|
|
139
|
+
function parseCssLengthToPx(value) {
|
|
140
|
+
if (!value) return void 0;
|
|
141
|
+
const v = value.trim().toLowerCase();
|
|
142
|
+
const px = v.match(/^(\d+(?:\.\d+)?)px$/);
|
|
143
|
+
if (px) return Math.max(1, Math.round(Number(px[1])));
|
|
144
|
+
return void 0;
|
|
145
|
+
}
|
|
146
|
+
function readUInt32BE(bytes, offset) {
|
|
147
|
+
if (offset < 0 || offset + 4 > bytes.length) return void 0;
|
|
148
|
+
return ((bytes[offset] ?? 0) << 24 | (bytes[offset + 1] ?? 0) << 16 | (bytes[offset + 2] ?? 0) << 8 | (bytes[offset + 3] ?? 0)) >>> 0;
|
|
149
|
+
}
|
|
150
|
+
function parsePngDimensions(data) {
|
|
151
|
+
if (data.length < 24) return void 0;
|
|
152
|
+
const signature = [137, 80, 78, 71, 13, 10, 26, 10];
|
|
153
|
+
for (let i = 0; i < signature.length; i++) {
|
|
154
|
+
if ((data[i] ?? 0) !== signature[i]) return void 0;
|
|
155
|
+
}
|
|
156
|
+
const widthPx = readUInt32BE(data, 16);
|
|
157
|
+
const heightPx = readUInt32BE(data, 20);
|
|
158
|
+
if (!widthPx || !heightPx) return void 0;
|
|
159
|
+
return { widthPx, heightPx };
|
|
160
|
+
}
|
|
161
|
+
function parseJpegDimensions(data) {
|
|
162
|
+
if (data.length < 4) return void 0;
|
|
163
|
+
if (data[0] !== 255 || data[1] !== 216) return void 0;
|
|
164
|
+
let offset = 2;
|
|
165
|
+
while (offset + 4 <= data.length) {
|
|
166
|
+
if (data[offset] !== 255) {
|
|
167
|
+
offset++;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
while (offset < data.length && data[offset] === 255) offset++;
|
|
171
|
+
if (offset >= data.length) return void 0;
|
|
172
|
+
const marker = data[offset];
|
|
173
|
+
offset++;
|
|
174
|
+
const isStandalone = marker === 217 || marker === 218;
|
|
175
|
+
if (isStandalone) break;
|
|
176
|
+
if (offset + 2 > data.length) return void 0;
|
|
177
|
+
const length = data[offset] << 8 | data[offset + 1];
|
|
178
|
+
if (length < 2 || offset + length > data.length) return void 0;
|
|
179
|
+
const isSof = marker === 192 || marker === 193 || marker === 194 || marker === 195 || marker === 197 || marker === 198 || marker === 199 || marker === 201 || marker === 202 || marker === 203 || marker === 205 || marker === 206 || marker === 207;
|
|
180
|
+
if (isSof) {
|
|
181
|
+
if (offset + 7 > data.length) return void 0;
|
|
182
|
+
const heightPx = data[offset + 3] << 8 | data[offset + 4];
|
|
183
|
+
const widthPx = data[offset + 5] << 8 | data[offset + 6];
|
|
184
|
+
if (!widthPx || !heightPx) return void 0;
|
|
185
|
+
return { widthPx, heightPx };
|
|
186
|
+
}
|
|
187
|
+
offset += length;
|
|
188
|
+
}
|
|
189
|
+
return void 0;
|
|
190
|
+
}
|
|
191
|
+
function parseIntrinsicImageSizePx(contentType, data) {
|
|
192
|
+
if (contentType === "image/png") return parsePngDimensions(data);
|
|
193
|
+
if (contentType === "image/jpeg") return parseJpegDimensions(data);
|
|
194
|
+
return void 0;
|
|
195
|
+
}
|
|
196
|
+
function applyMaxBoxPx(size, maxBox) {
|
|
197
|
+
const w = Math.max(1, Math.round(size.widthPx));
|
|
198
|
+
const h = Math.max(1, Math.round(size.heightPx));
|
|
199
|
+
const scale = Math.min(1, maxBox.maxWidthPx / w, maxBox.maxHeightPx / h);
|
|
200
|
+
return { widthPx: Math.max(1, Math.round(w * scale)), heightPx: Math.max(1, Math.round(h * scale)) };
|
|
201
|
+
}
|
|
202
|
+
function computeImageSizePx(node, intrinsic) {
|
|
203
|
+
const wAttr = node.attribs?.width ? Number(node.attribs.width) : void 0;
|
|
204
|
+
const hAttr = node.attribs?.height ? Number(node.attribs.height) : void 0;
|
|
205
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
206
|
+
const wCss = parseCssLengthToPx(css.width);
|
|
207
|
+
const hCss = parseCssLengthToPx(css.height);
|
|
208
|
+
const widthAttrPx = Number.isFinite(wAttr) && wAttr ? Math.max(1, Math.round(wAttr)) : void 0;
|
|
209
|
+
const heightAttrPx = Number.isFinite(hAttr) && hAttr ? Math.max(1, Math.round(hAttr)) : void 0;
|
|
210
|
+
const ratio = intrinsic && intrinsic.widthPx > 0 && intrinsic.heightPx > 0 ? intrinsic.heightPx / intrinsic.widthPx : widthAttrPx && heightAttrPx ? heightAttrPx / widthAttrPx : 0.5;
|
|
211
|
+
const widthPx = typeof wCss === "number" ? wCss : typeof widthAttrPx === "number" ? widthAttrPx : intrinsic?.widthPx ?? 300;
|
|
212
|
+
const heightPx = typeof hCss === "number" ? hCss : typeof heightAttrPx === "number" ? heightAttrPx : intrinsic?.heightPx ?? 150;
|
|
213
|
+
const finalSize = typeof wCss === "number" && typeof hCss !== "number" ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof hCss === "number" && typeof wCss !== "number" ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : typeof widthAttrPx === "number" && typeof heightAttrPx !== "number" && intrinsic ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof heightAttrPx === "number" && typeof widthAttrPx !== "number" && intrinsic ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : { widthPx, heightPx };
|
|
214
|
+
return applyMaxBoxPx(finalSize, { maxWidthPx: 624, maxHeightPx: 864 });
|
|
215
|
+
}
|
|
216
|
+
function collectInlineRuns(node, inherited, out, result) {
|
|
217
|
+
if (node.type === "text") {
|
|
218
|
+
const text = node.data ?? "";
|
|
219
|
+
if (text) out.push({ kind: "text", text, style: inherited });
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
if (node.type === "tag") {
|
|
223
|
+
const tag = node.name?.toLowerCase();
|
|
224
|
+
if (tag === "br") {
|
|
225
|
+
out.push({ kind: "br" });
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
if (tag === "img") {
|
|
229
|
+
const src = node.attribs?.src;
|
|
230
|
+
if (!src) return;
|
|
231
|
+
const parsed = parseImageDataUrl(src);
|
|
232
|
+
if (!parsed) return;
|
|
233
|
+
const intrinsic = parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
|
|
234
|
+
const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
|
|
235
|
+
const id = result.images.length + 1;
|
|
236
|
+
const relationshipId = `rId${id + IMAGE_RELATIONSHIP_ID_OFFSET}`;
|
|
237
|
+
const target = `media/image${id}.${parsed.extension}`;
|
|
238
|
+
result.images.push({
|
|
239
|
+
relationshipId,
|
|
240
|
+
target,
|
|
241
|
+
data: parsed.data,
|
|
242
|
+
contentType: parsed.contentType,
|
|
243
|
+
widthPx,
|
|
244
|
+
heightPx
|
|
245
|
+
});
|
|
246
|
+
out.push({ kind: "image", image: { relationshipId, widthPx, heightPx } });
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
249
|
+
if (tag === "canvas") {
|
|
250
|
+
const dataUrl = node.attribs?.["data-image"] ?? node.attribs?.["data-src"];
|
|
251
|
+
if (!dataUrl) return;
|
|
252
|
+
const parsed = parseImageDataUrl(dataUrl);
|
|
253
|
+
if (!parsed) return;
|
|
254
|
+
const bufferW = node.attribs?.width ? Number(node.attribs.width) : void 0;
|
|
255
|
+
const bufferH = node.attribs?.height ? Number(node.attribs.height) : void 0;
|
|
256
|
+
const intrinsic = Number.isFinite(bufferW) && bufferW && Number.isFinite(bufferH) && bufferH ? { widthPx: Math.max(1, Math.round(bufferW)), heightPx: Math.max(1, Math.round(bufferH)) } : parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
|
|
257
|
+
const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
|
|
258
|
+
const id = result.images.length + 1;
|
|
259
|
+
const relationshipId = `rId${id + IMAGE_RELATIONSHIP_ID_OFFSET}`;
|
|
260
|
+
const target = `media/image${id}.${parsed.extension}`;
|
|
261
|
+
result.images.push({
|
|
262
|
+
relationshipId,
|
|
263
|
+
target,
|
|
264
|
+
data: parsed.data,
|
|
265
|
+
contentType: parsed.contentType,
|
|
266
|
+
widthPx,
|
|
267
|
+
heightPx
|
|
268
|
+
});
|
|
269
|
+
out.push({ kind: "image", image: { relationshipId, widthPx, heightPx } });
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
const next = mergeTextStyle(inherited, styleFromElement(node));
|
|
273
|
+
const children2 = node.children ?? [];
|
|
274
|
+
for (const c of children2) collectInlineRuns(c, next, out, result);
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
const children = node.children ?? [];
|
|
278
|
+
for (const c of children) collectInlineRuns(c, inherited, out, result);
|
|
279
|
+
}
|
|
280
|
+
function buildRunXml(style, text) {
|
|
281
|
+
const rPrParts = [];
|
|
282
|
+
if (style.bold) rPrParts.push("<w:b/>");
|
|
283
|
+
if (style.italic) rPrParts.push("<w:i/>");
|
|
284
|
+
if (style.underline) rPrParts.push('<w:u w:val="single"/>');
|
|
285
|
+
if (style.colorHex) rPrParts.push(`<w:color w:val="${style.colorHex}"/>`);
|
|
286
|
+
if (style.fontFamily) {
|
|
287
|
+
const ff = escapeXmlText(style.fontFamily);
|
|
288
|
+
rPrParts.push(`<w:rFonts w:ascii="${ff}" w:hAnsi="${ff}" w:eastAsia="${ff}"/>`);
|
|
289
|
+
}
|
|
290
|
+
if (typeof style.fontSizeHalfPoints === "number") {
|
|
291
|
+
const sz = style.fontSizeHalfPoints;
|
|
292
|
+
rPrParts.push(`<w:sz w:val="${sz}"/><w:szCs w:val="${sz}"/>`);
|
|
293
|
+
}
|
|
294
|
+
const rPrXml = rPrParts.length ? `<w:rPr>${rPrParts.join("")}</w:rPr>` : "";
|
|
295
|
+
const escaped = escapeXmlText(text);
|
|
296
|
+
const preserve = shouldPreserveSpace(text) ? ' xml:space="preserve"' : "";
|
|
297
|
+
return `<w:r>${rPrXml}<w:t${preserve}>${escaped}</w:t></w:r>`;
|
|
298
|
+
}
|
|
299
|
+
function pxToEmu(px) {
|
|
300
|
+
return Math.max(1, Math.round(px * 9525));
|
|
301
|
+
}
|
|
302
|
+
function buildImageRunXml(image) {
|
|
303
|
+
const cx = pxToEmu(image.widthPx);
|
|
304
|
+
const cy = pxToEmu(image.heightPx);
|
|
305
|
+
const docPrId = image.relationshipId.replace(/^rId/, "");
|
|
306
|
+
const name = `Picture ${docPrId}`;
|
|
307
|
+
return `<w:r><w:drawing xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><wp:inline distT="0" distB="0" distL="0" distR="0"><wp:extent cx="${cx}" cy="${cy}"/><wp:docPr id="${docPrId}" name="${escapeXmlText(name)}"/><a:graphic><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic><pic:nvPicPr><pic:cNvPr id="0" name="${escapeXmlText(name)}"/><pic:cNvPicPr/></pic:nvPicPr><pic:blipFill><a:blip r:embed="${image.relationshipId}"/><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="${cx}" cy="${cy}"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr></pic:pic></a:graphicData></a:graphic></wp:inline></w:drawing></w:r>`;
|
|
308
|
+
}
|
|
309
|
+
function hasClass(node, className) {
|
|
310
|
+
const cls = node.attribs?.class;
|
|
311
|
+
if (!cls) return false;
|
|
312
|
+
return cls.split(/\s+/).includes(className);
|
|
313
|
+
}
|
|
314
|
+
function isSkippableSubtree(node) {
|
|
315
|
+
if (node.type !== "tag") return false;
|
|
316
|
+
const tag = node.name?.toLowerCase();
|
|
317
|
+
if (tag === "button") return true;
|
|
318
|
+
if (tag === "canvas") {
|
|
319
|
+
const dataUrl = node.attribs?.["data-image"] ?? node.attribs?.["data-src"];
|
|
320
|
+
if (!dataUrl) return true;
|
|
321
|
+
}
|
|
322
|
+
if (tag === "img" && hasClass(node, "ProseMirror-separator")) return true;
|
|
323
|
+
if (node.attribs?.id === "pages") return true;
|
|
324
|
+
if (hasClass(node, "ProseMirror-widget")) return true;
|
|
325
|
+
return false;
|
|
326
|
+
}
|
|
327
|
+
function parseCssLengthToTwips(value, baseFontHalfPoints) {
|
|
328
|
+
if (!value) return void 0;
|
|
329
|
+
const v = value.trim().toLowerCase();
|
|
330
|
+
if (!v) return void 0;
|
|
331
|
+
const pt = v.match(/^(-?\d+(?:\.\d+)?)pt$/);
|
|
332
|
+
if (pt) return Math.round(Number(pt[1]) * 20);
|
|
333
|
+
const px = v.match(/^(-?\d+(?:\.\d+)?)px$/);
|
|
334
|
+
if (px) return Math.round(Number(px[1]) * 72 * 20 / 96);
|
|
335
|
+
const em = v.match(/^(-?\d+(?:\.\d+)?)em$/);
|
|
336
|
+
if (em) {
|
|
337
|
+
const basePt = baseFontHalfPoints / 2;
|
|
338
|
+
return Math.round(Number(em[1]) * basePt * 20);
|
|
339
|
+
}
|
|
340
|
+
const num = v.match(/^(-?\d+(?:\.\d+)?)$/);
|
|
341
|
+
if (num) return Math.round(Number(num[1]));
|
|
342
|
+
return void 0;
|
|
343
|
+
}
|
|
344
|
+
function inferFirstFontSizeHalfPoints(node) {
|
|
345
|
+
const stack = [node];
|
|
346
|
+
while (stack.length) {
|
|
347
|
+
const cur = stack.pop();
|
|
348
|
+
if (cur.type === "tag") {
|
|
349
|
+
const css = parseStyleAttribute(cur.attribs?.style);
|
|
350
|
+
const sz = parseFontSizeToHalfPoints(css["font-size"]);
|
|
351
|
+
if (typeof sz === "number") return sz;
|
|
352
|
+
}
|
|
353
|
+
const children = cur.children ?? [];
|
|
354
|
+
for (let i = children.length - 1; i >= 0; i--) {
|
|
355
|
+
stack.push(children[i]);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
return void 0;
|
|
359
|
+
}
|
|
360
|
+
function buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId) {
|
|
361
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
362
|
+
const parts = [];
|
|
363
|
+
if (pStyleId) parts.push(`<w:pStyle w:val="${escapeXmlText(pStyleId)}"/>`);
|
|
364
|
+
const align = css["text-align"]?.trim().toLowerCase();
|
|
365
|
+
const jcVal = align === "center" ? "center" : align === "right" ? "right" : align === "justify" ? "both" : void 0;
|
|
366
|
+
if (jcVal) parts.push(`<w:jc w:val="${jcVal}"/>`);
|
|
367
|
+
const left = (() => {
|
|
368
|
+
const marginLeft = parseCssLengthToTwips(css["margin-left"], baseFontHalfPoints);
|
|
369
|
+
const paddingLeft = parseCssLengthToTwips(css["padding-left"], baseFontHalfPoints);
|
|
370
|
+
const sum = (marginLeft ?? 0) + (paddingLeft ?? 0);
|
|
371
|
+
if (!sum) return void 0;
|
|
372
|
+
return Math.max(0, sum);
|
|
373
|
+
})();
|
|
374
|
+
const firstLine = (() => {
|
|
375
|
+
const textIndent = parseCssLengthToTwips(css["text-indent"], baseFontHalfPoints);
|
|
376
|
+
if (typeof textIndent !== "number" || !textIndent) return void 0;
|
|
377
|
+
return Math.max(0, textIndent);
|
|
378
|
+
})();
|
|
379
|
+
const indAttrs = [];
|
|
380
|
+
const leftTwips = extraInd?.leftTwips ?? left;
|
|
381
|
+
if (typeof leftTwips === "number") indAttrs.push(`w:left="${leftTwips}"`);
|
|
382
|
+
const hangingTwips = extraInd?.hangingTwips;
|
|
383
|
+
if (typeof hangingTwips === "number") indAttrs.push(`w:hanging="${hangingTwips}"`);
|
|
384
|
+
if (typeof firstLine === "number") indAttrs.push(`w:firstLine="${firstLine}"`);
|
|
385
|
+
if (indAttrs.length) parts.push(`<w:ind ${indAttrs.join(" ")}/>`);
|
|
386
|
+
const before = parseCssLengthToTwips(css["margin-top"], baseFontHalfPoints);
|
|
387
|
+
const after = parseCssLengthToTwips(css["margin-bottom"], baseFontHalfPoints);
|
|
388
|
+
const lineHeight = (() => {
|
|
389
|
+
const lh = css["line-height"]?.trim().toLowerCase();
|
|
390
|
+
if (!lh || lh === "normal") return void 0;
|
|
391
|
+
const unitless = lh.match(/^(\d+(?:\.\d+)?)$/);
|
|
392
|
+
if (unitless) {
|
|
393
|
+
const multiplier = Number(unitless[1]);
|
|
394
|
+
if (!Number.isFinite(multiplier) || multiplier <= 0) return void 0;
|
|
395
|
+
const basePt = baseFontHalfPoints / 2;
|
|
396
|
+
return Math.round(basePt * multiplier * 20);
|
|
397
|
+
}
|
|
398
|
+
const twips = parseCssLengthToTwips(lh, baseFontHalfPoints);
|
|
399
|
+
if (typeof twips !== "number") return void 0;
|
|
400
|
+
return Math.max(1, twips);
|
|
401
|
+
})();
|
|
402
|
+
if (typeof before === "number" || typeof after === "number" || typeof lineHeight === "number") {
|
|
403
|
+
const attrs = [];
|
|
404
|
+
if (typeof before === "number") attrs.push(`w:before="${Math.max(0, before)}"`);
|
|
405
|
+
if (typeof after === "number") attrs.push(`w:after="${Math.max(0, after)}"`);
|
|
406
|
+
if (typeof lineHeight === "number") {
|
|
407
|
+
attrs.push(`w:line="${lineHeight}"`, 'w:lineRule="exact"');
|
|
408
|
+
}
|
|
409
|
+
parts.push(`<w:spacing ${attrs.join(" ")}/>`);
|
|
410
|
+
}
|
|
411
|
+
if (!parts.length) return "";
|
|
412
|
+
return `<w:pPr>${parts.join("")}</w:pPr>`;
|
|
413
|
+
}
|
|
414
|
+
function buildParagraphXmlFromContainer(node, baseStyle, extraInd, pStyleId, result) {
|
|
415
|
+
const baseFontHalfPoints = baseStyle.fontSizeHalfPoints ?? inferFirstFontSizeHalfPoints(node) ?? 28;
|
|
416
|
+
const pPrXml = buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId);
|
|
417
|
+
const runs = [];
|
|
418
|
+
const res = result ?? {
|
|
419
|
+
bodyXml: "",
|
|
420
|
+
images: []
|
|
421
|
+
};
|
|
422
|
+
for (const c of node.children ?? []) collectInlineRuns(c, baseStyle, runs, res);
|
|
423
|
+
const rXml = [];
|
|
424
|
+
for (const token of runs) {
|
|
425
|
+
if (token.kind === "br") {
|
|
426
|
+
rXml.push("<w:r><w:br/></w:r>");
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
if (token.kind === "image") {
|
|
430
|
+
rXml.push(buildImageRunXml(token.image));
|
|
431
|
+
continue;
|
|
432
|
+
}
|
|
433
|
+
const text = token.text;
|
|
434
|
+
if (!text) continue;
|
|
435
|
+
if (!text.trim() && !shouldKeepWhitespaceOnlyRun(text)) continue;
|
|
436
|
+
rXml.push(buildRunXml(token.style, text));
|
|
437
|
+
}
|
|
438
|
+
if (!rXml.length) return "";
|
|
439
|
+
return `<w:p>${pPrXml}${rXml.join("")}</w:p>`;
|
|
440
|
+
}
|
|
441
|
+
var PAGE_BREAK_XML = '<w:p><w:r><w:br w:type="page"/></w:r></w:p>';
|
|
442
|
+
function isExplicitPageBreak(node) {
|
|
443
|
+
if (node.type !== "tag") return false;
|
|
444
|
+
const tag = node.name?.toLowerCase();
|
|
445
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
446
|
+
const cls = node.attribs?.class ?? "";
|
|
447
|
+
const classList = cls ? cls.split(/\s+/) : [];
|
|
448
|
+
if (tag === "hr" && classList.includes("page-break")) return true;
|
|
449
|
+
if (classList.includes("page-break")) return true;
|
|
450
|
+
if (node.attribs?.["data-page-break"] === "true") return true;
|
|
451
|
+
const after = css["page-break-after"]?.toLowerCase() ?? css["break-after"]?.toLowerCase();
|
|
452
|
+
const before = css["page-break-before"]?.toLowerCase() ?? css["break-before"]?.toLowerCase();
|
|
453
|
+
if (after?.includes("always") || before?.includes("always")) return true;
|
|
454
|
+
return false;
|
|
455
|
+
}
|
|
456
|
+
function buildListBlocks(listNode, ordered, level, result) {
|
|
457
|
+
const liNodes = (listNode.children ?? []).filter(
|
|
458
|
+
(c) => c.type === "tag" && c.name?.toLowerCase() === "li"
|
|
459
|
+
);
|
|
460
|
+
if (!liNodes.length) return [];
|
|
461
|
+
const out = [];
|
|
462
|
+
const numId = ordered ? 2 : 1;
|
|
463
|
+
const ilvl = Math.max(0, Math.min(8, Math.floor(level)));
|
|
464
|
+
const leftTwips = 720 * (ilvl + 1);
|
|
465
|
+
const hangingTwips = 360;
|
|
466
|
+
for (const li of liNodes) {
|
|
467
|
+
const nestedLists = [];
|
|
468
|
+
const baseStyle = {};
|
|
469
|
+
const runs = [];
|
|
470
|
+
for (const c of li.children ?? []) {
|
|
471
|
+
if (c.type === "tag") {
|
|
472
|
+
const tag = c.name?.toLowerCase();
|
|
473
|
+
if (tag === "ul" || tag === "ol") {
|
|
474
|
+
nestedLists.push(c);
|
|
475
|
+
continue;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
collectInlineRuns(c, baseStyle, runs, result);
|
|
479
|
+
}
|
|
480
|
+
const rXml = [];
|
|
481
|
+
for (const token of runs) {
|
|
482
|
+
if (token.kind === "br") {
|
|
483
|
+
rXml.push("<w:r><w:br/></w:r>");
|
|
484
|
+
continue;
|
|
485
|
+
}
|
|
486
|
+
if (token.kind === "image") {
|
|
487
|
+
rXml.push(buildImageRunXml(token.image));
|
|
488
|
+
continue;
|
|
489
|
+
}
|
|
490
|
+
const text = token.text;
|
|
491
|
+
if (!text) continue;
|
|
492
|
+
if (!text.trim() && !shouldKeepWhitespaceOnlyRun(text)) continue;
|
|
493
|
+
rXml.push(buildRunXml(token.style, text));
|
|
494
|
+
}
|
|
495
|
+
if (rXml.length) {
|
|
496
|
+
const baseFontHalfPoints = inferFirstFontSizeHalfPoints(li) ?? 28;
|
|
497
|
+
const pPrXml = buildParagraphPrXml(
|
|
498
|
+
li,
|
|
499
|
+
baseFontHalfPoints,
|
|
500
|
+
{ leftTwips, hangingTwips },
|
|
501
|
+
void 0
|
|
502
|
+
);
|
|
503
|
+
const numPrXml = `<w:numPr><w:ilvl w:val="${ilvl}"/><w:numId w:val="${numId}"/></w:numPr>`;
|
|
504
|
+
const mergedPPrXml = pPrXml ? pPrXml.replace("<w:pPr>", `<w:pPr>${numPrXml}`) : `<w:pPr>${numPrXml}<w:ind w:left="${leftTwips}" w:hanging="${hangingTwips}"/></w:pPr>`;
|
|
505
|
+
out.push(`<w:p>${mergedPPrXml}${rXml.join("")}</w:p>`);
|
|
506
|
+
}
|
|
507
|
+
for (const nested of nestedLists) {
|
|
508
|
+
const nestedOrdered = nested.name?.toLowerCase() === "ol";
|
|
509
|
+
out.push(...buildListBlocks(nested, nestedOrdered, ilvl + 1, result));
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
return out;
|
|
513
|
+
}
|
|
514
|
+
function parseCellWidthTwips(node) {
|
|
515
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
516
|
+
const width = parseCssLengthToTwips(css.width, 28);
|
|
517
|
+
if (typeof width !== "number" || width <= 0) return void 0;
|
|
518
|
+
return width;
|
|
519
|
+
}
|
|
520
|
+
function estimateTextWidthTwips(text, baseFontHalfPoints) {
|
|
521
|
+
const basePt = baseFontHalfPoints / 2;
|
|
522
|
+
const cjkRegex = /[\u3400-\u4dbf\u4e00-\u9fff\u3000-\u303f\uff00-\uffef]/;
|
|
523
|
+
let cjk = 0;
|
|
524
|
+
let latin = 0;
|
|
525
|
+
let spaces = 0;
|
|
526
|
+
for (const ch of text) {
|
|
527
|
+
if (ch === " " || ch === " ") {
|
|
528
|
+
spaces++;
|
|
529
|
+
continue;
|
|
530
|
+
}
|
|
531
|
+
if (cjkRegex.test(ch)) {
|
|
532
|
+
cjk++;
|
|
533
|
+
continue;
|
|
534
|
+
}
|
|
535
|
+
latin++;
|
|
536
|
+
}
|
|
537
|
+
const cjkTwips = Math.round(basePt * 20);
|
|
538
|
+
const latinTwips = Math.round(basePt * 11);
|
|
539
|
+
const spaceTwips = Math.round(basePt * 6);
|
|
540
|
+
return cjk * cjkTwips + latin * latinTwips + spaces * spaceTwips;
|
|
541
|
+
}
|
|
542
|
+
function parseBorderShorthand(value, baseFontHalfPoints) {
|
|
543
|
+
if (!value) return void 0;
|
|
544
|
+
const raw = value.trim().toLowerCase();
|
|
545
|
+
if (!raw) return void 0;
|
|
546
|
+
if (raw === "none" || raw === "0") return { val: "nil", sz: 0 };
|
|
547
|
+
const tokens = raw.split(/\s+/).filter(Boolean);
|
|
548
|
+
if (!tokens.length) return void 0;
|
|
549
|
+
const css = Object.fromEntries(tokens.map((t, i) => [`${i}`, t]));
|
|
550
|
+
const widthToken = Object.values(css).find((t) => /^(?:\d+(?:\.\d+)?)(?:px|pt)?$/.test(t));
|
|
551
|
+
const styleToken = Object.values(css).find(
|
|
552
|
+
(t) => ["none", "solid", "dashed", "dotted", "double", "hidden"].includes(t)
|
|
553
|
+
);
|
|
554
|
+
const colorToken = Object.values(css).find((t) => t.startsWith("#") || t.startsWith("rgb("));
|
|
555
|
+
const widthTwips = parseCssLengthToTwips(widthToken, baseFontHalfPoints);
|
|
556
|
+
const sz = (() => {
|
|
557
|
+
if (typeof widthTwips !== "number") return 4;
|
|
558
|
+
if (widthTwips <= 0) return 0;
|
|
559
|
+
return Math.max(2, Math.round(widthTwips * 0.4));
|
|
560
|
+
})();
|
|
561
|
+
const val = (() => {
|
|
562
|
+
if (!styleToken) return "single";
|
|
563
|
+
if (styleToken === "none" || styleToken === "hidden") return "nil";
|
|
564
|
+
if (styleToken === "solid") return "single";
|
|
565
|
+
if (styleToken === "dashed") return "dashed";
|
|
566
|
+
if (styleToken === "dotted") return "dotted";
|
|
567
|
+
if (styleToken === "double") return "double";
|
|
568
|
+
return "single";
|
|
569
|
+
})();
|
|
570
|
+
const colorHex = parseCssColorToHex(colorToken);
|
|
571
|
+
return { val, sz, colorHex };
|
|
572
|
+
}
|
|
573
|
+
function buildBorderTag(tag, border, fallbackColorHex) {
|
|
574
|
+
const b = border ?? { val: "single", sz: 4, colorHex: fallbackColorHex };
|
|
575
|
+
const color = (b.colorHex ?? fallbackColorHex).toUpperCase();
|
|
576
|
+
return `<w:${tag} w:val="${b.val}" w:sz="${b.sz}" w:space="0" w:color="${color}"/>`;
|
|
577
|
+
}
|
|
578
|
+
function injectTableCellParagraphSpacing(pXml) {
|
|
579
|
+
if (!pXml.includes("<w:p")) return pXml;
|
|
580
|
+
if (!pXml.includes("<w:p>")) return pXml;
|
|
581
|
+
const spacingXml = '<w:spacing w:before="0" w:after="0" w:line="360" w:lineRule="auto"/><w:wordWrap w:val="1"/>';
|
|
582
|
+
if (pXml.includes("<w:pPr>")) {
|
|
583
|
+
if (pXml.includes("<w:spacing ")) return pXml;
|
|
584
|
+
return pXml.replace("<w:pPr>", `<w:pPr>${spacingXml}`);
|
|
585
|
+
}
|
|
586
|
+
return pXml.replace("<w:p>", `<w:p><w:pPr>${spacingXml}</w:pPr>`);
|
|
587
|
+
}
|
|
588
|
+
function buildTableCellBlocksXml(cell, baseStyle, result) {
|
|
589
|
+
const children = cell.children ?? [];
|
|
590
|
+
const hasBlocks = children.some((c) => {
|
|
591
|
+
if (c.type !== "tag") return false;
|
|
592
|
+
const tag = c.name?.toLowerCase();
|
|
593
|
+
return tag === "p" || tag === "ul" || tag === "ol" || tag === "img" || tag === "canvas" || /^h[1-6]$/.test(tag ?? "");
|
|
594
|
+
});
|
|
595
|
+
const out = [];
|
|
596
|
+
if (!hasBlocks) {
|
|
597
|
+
const p = buildParagraphXmlFromContainer(cell, baseStyle, void 0, void 0, result);
|
|
598
|
+
if (p) out.push(p);
|
|
599
|
+
return out.length ? out.map(injectTableCellParagraphSpacing).join("") : "<w:p/>";
|
|
600
|
+
}
|
|
601
|
+
for (const c of children) {
|
|
602
|
+
if (c.type === "tag") {
|
|
603
|
+
const tag = c.name?.toLowerCase();
|
|
604
|
+
if (tag === "p") {
|
|
605
|
+
const p = buildParagraphXmlFromContainer(c, baseStyle, void 0, void 0, result);
|
|
606
|
+
if (p) out.push(p);
|
|
607
|
+
continue;
|
|
608
|
+
}
|
|
609
|
+
if (tag && /^h[1-6]$/.test(tag)) {
|
|
610
|
+
const level = Number(tag.slice(1));
|
|
611
|
+
const p = buildParagraphXmlFromContainer(c, baseStyle, void 0, `Heading${level}`, result);
|
|
612
|
+
if (p) out.push(p);
|
|
613
|
+
continue;
|
|
614
|
+
}
|
|
615
|
+
if (tag === "ul" || tag === "ol") {
|
|
616
|
+
out.push(...buildListBlocks(c, tag === "ol", 0, result));
|
|
617
|
+
continue;
|
|
618
|
+
}
|
|
619
|
+
if (tag === "img" || tag === "canvas") {
|
|
620
|
+
const p = buildParagraphXmlFromSingleInlineNode(c, baseStyle, result);
|
|
621
|
+
if (p) out.push(p);
|
|
622
|
+
continue;
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
if (!out.length) return "<w:p/>";
|
|
627
|
+
return out.map(injectTableCellParagraphSpacing).join("");
|
|
628
|
+
}
|
|
629
|
+
function buildTableXml(tableNode, result) {
|
|
630
|
+
const rows = [];
|
|
631
|
+
const stack = [...tableNode.children ?? []];
|
|
632
|
+
while (stack.length) {
|
|
633
|
+
const n = stack.shift();
|
|
634
|
+
if (n.type === "tag" && n.name?.toLowerCase() === "tr") rows.push(n);
|
|
635
|
+
if (n.children?.length) stack.unshift(...n.children);
|
|
636
|
+
}
|
|
637
|
+
const rowCells = rows.map(
|
|
638
|
+
(tr) => (tr.children ?? []).filter((c) => c.type === "tag" && (c.name === "td" || c.name === "th"))
|
|
639
|
+
);
|
|
640
|
+
const colCount = Math.max(0, ...rowCells.map((cells) => cells.length));
|
|
641
|
+
const maxTableWidthTwips = 9360;
|
|
642
|
+
const estimatedColWidths = new Array(colCount).fill(0).map((_, i) => {
|
|
643
|
+
let explicit;
|
|
644
|
+
let estimated = 0;
|
|
645
|
+
for (const cells of rowCells) {
|
|
646
|
+
const cell = cells[i];
|
|
647
|
+
if (!cell) continue;
|
|
648
|
+
const w = parseCellWidthTwips(cell);
|
|
649
|
+
if (typeof w === "number") explicit = explicit ?? w;
|
|
650
|
+
const text = getTextContent(cell).replace(/\s+/g, " ").trim();
|
|
651
|
+
if (!text) continue;
|
|
652
|
+
const baseFontHalfPoints = inferFirstFontSizeHalfPoints(cell) ?? 28;
|
|
653
|
+
const wTwips = estimateTextWidthTwips(text, baseFontHalfPoints) + 240;
|
|
654
|
+
estimated = Math.max(estimated, wTwips);
|
|
655
|
+
}
|
|
656
|
+
const base = typeof explicit === "number" ? explicit : estimated || Math.round(maxTableWidthTwips / Math.max(1, colCount));
|
|
657
|
+
return Math.max(720, Math.min(6e3, Math.round(base)));
|
|
658
|
+
});
|
|
659
|
+
const normalizedColWidths = (() => {
|
|
660
|
+
const sum = estimatedColWidths.reduce((a, b) => a + b, 0);
|
|
661
|
+
if (!sum) return estimatedColWidths;
|
|
662
|
+
if (sum <= maxTableWidthTwips) return estimatedColWidths;
|
|
663
|
+
const scaled = estimatedColWidths.map(
|
|
664
|
+
(w) => Math.max(720, Math.floor(w * maxTableWidthTwips / sum))
|
|
665
|
+
);
|
|
666
|
+
const scaledSum = scaled.reduce((a, b) => a + b, 0);
|
|
667
|
+
const diff = maxTableWidthTwips - scaledSum;
|
|
668
|
+
if (diff !== 0 && scaled.length) scaled[scaled.length - 1] = Math.max(720, scaled[scaled.length - 1] + diff);
|
|
669
|
+
return scaled;
|
|
670
|
+
})();
|
|
671
|
+
const tblGrid = `<w:tblGrid>${normalizedColWidths.map((w) => `<w:gridCol w:w="${w}"/>`).join("")}</w:tblGrid>`;
|
|
672
|
+
const rowXml = [];
|
|
673
|
+
for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
|
|
674
|
+
const tr = rows[rowIdx];
|
|
675
|
+
const cells = rowCells[rowIdx] ?? [];
|
|
676
|
+
const cellXml = [];
|
|
677
|
+
for (let i = 0; i < cells.length; i++) {
|
|
678
|
+
const cell = cells[i];
|
|
679
|
+
const isHeader = cell.name === "th";
|
|
680
|
+
const baseStyle = isHeader ? { bold: true } : {};
|
|
681
|
+
const paragraphs = buildTableCellBlocksXml(cell, baseStyle, result);
|
|
682
|
+
const css = parseStyleAttribute(cell.attribs?.style);
|
|
683
|
+
const widthTwips = parseCellWidthTwips(cell) ?? normalizedColWidths[i];
|
|
684
|
+
const tcW = typeof widthTwips === "number" ? `<w:tcW w:w="${widthTwips}" w:type="dxa"/>` : `<w:tcW w:w="0" w:type="auto"/>`;
|
|
685
|
+
const vAlign = (() => {
|
|
686
|
+
const v = css["vertical-align"]?.trim().toLowerCase();
|
|
687
|
+
if (!v) return "";
|
|
688
|
+
if (v === "middle" || v === "center") return '<w:vAlign w:val="center"/>';
|
|
689
|
+
if (v === "bottom") return '<w:vAlign w:val="bottom"/>';
|
|
690
|
+
if (v === "top") return '<w:vAlign w:val="top"/>';
|
|
691
|
+
return "";
|
|
692
|
+
})();
|
|
693
|
+
const shd = (() => {
|
|
694
|
+
const hex = parseCssColorToHex(css["background-color"]);
|
|
695
|
+
if (!hex) return "";
|
|
696
|
+
return `<w:shd w:val="clear" w:color="auto" w:fill="${hex}"/>`;
|
|
697
|
+
})();
|
|
698
|
+
const noWrap = (() => {
|
|
699
|
+
const ws = css["white-space"]?.trim().toLowerCase();
|
|
700
|
+
if (ws?.includes("nowrap")) return "<w:noWrap/>";
|
|
701
|
+
return "";
|
|
702
|
+
})();
|
|
703
|
+
const cellBorder = (() => {
|
|
704
|
+
const bAll = parseBorderShorthand(css.border, 28);
|
|
705
|
+
const bTop = parseBorderShorthand(css["border-top"] ?? css.border, 28);
|
|
706
|
+
const bLeft = parseBorderShorthand(css["border-left"] ?? css.border, 28);
|
|
707
|
+
const bBottom = parseBorderShorthand(css["border-bottom"] ?? css.border, 28);
|
|
708
|
+
const bRight = parseBorderShorthand(css["border-right"] ?? css.border, 28);
|
|
709
|
+
const any = bAll || css.border || css["border-top"] || css["border-left"] || css["border-bottom"] || css["border-right"];
|
|
710
|
+
if (!any) return "";
|
|
711
|
+
const fallback = bAll?.colorHex ?? "D9D9D9";
|
|
712
|
+
return `<w:tcBorders>${buildBorderTag("top", bTop, fallback)}${buildBorderTag(
|
|
713
|
+
"left",
|
|
714
|
+
bLeft,
|
|
715
|
+
fallback
|
|
716
|
+
)}${buildBorderTag("bottom", bBottom, fallback)}${buildBorderTag(
|
|
717
|
+
"right",
|
|
718
|
+
bRight,
|
|
719
|
+
fallback
|
|
720
|
+
)}</w:tcBorders>`;
|
|
721
|
+
})();
|
|
722
|
+
cellXml.push(`<w:tc><w:tcPr>${tcW}${vAlign}${shd}${noWrap}${cellBorder}</w:tcPr>${paragraphs}</w:tc>`);
|
|
723
|
+
}
|
|
724
|
+
if (cellXml.length) rowXml.push(`<w:tr>${cellXml.join("")}</w:tr>`);
|
|
725
|
+
}
|
|
726
|
+
const tblCss = parseStyleAttribute(tableNode.attribs?.style);
|
|
727
|
+
const tblAlign = (() => {
|
|
728
|
+
const ml = tblCss["margin-left"]?.trim().toLowerCase();
|
|
729
|
+
const mr = tblCss["margin-right"]?.trim().toLowerCase();
|
|
730
|
+
const m = tblCss.margin?.trim().toLowerCase();
|
|
731
|
+
if (ml === "auto" && mr === "auto" || (m?.includes("auto") ?? false)) return '<w:tblJc w:val="center"/>';
|
|
732
|
+
const ta = tblCss["text-align"]?.trim().toLowerCase();
|
|
733
|
+
if (ta === "center") return '<w:tblJc w:val="center"/>';
|
|
734
|
+
if (ta === "right") return '<w:tblJc w:val="right"/>';
|
|
735
|
+
return "";
|
|
736
|
+
})();
|
|
737
|
+
const tblBorder = (() => {
|
|
738
|
+
const border = parseBorderShorthand(tblCss.border, 28);
|
|
739
|
+
if (tblCss.border) {
|
|
740
|
+
const fallback2 = border?.colorHex ?? "D9D9D9";
|
|
741
|
+
return `<w:tblBorders>${buildBorderTag("top", border, fallback2)}${buildBorderTag(
|
|
742
|
+
"left",
|
|
743
|
+
border,
|
|
744
|
+
fallback2
|
|
745
|
+
)}${buildBorderTag("bottom", border, fallback2)}${buildBorderTag(
|
|
746
|
+
"right",
|
|
747
|
+
border,
|
|
748
|
+
fallback2
|
|
749
|
+
)}${buildBorderTag("insideH", border, fallback2)}${buildBorderTag(
|
|
750
|
+
"insideV",
|
|
751
|
+
border,
|
|
752
|
+
fallback2
|
|
753
|
+
)}</w:tblBorders>`;
|
|
754
|
+
}
|
|
755
|
+
const fallback = "D9D9D9";
|
|
756
|
+
return `<w:tblBorders>${buildBorderTag("top", void 0, fallback)}${buildBorderTag(
|
|
757
|
+
"left",
|
|
758
|
+
void 0,
|
|
759
|
+
fallback
|
|
760
|
+
)}${buildBorderTag("bottom", void 0, fallback)}${buildBorderTag(
|
|
761
|
+
"right",
|
|
762
|
+
void 0,
|
|
763
|
+
fallback
|
|
764
|
+
)}${buildBorderTag("insideH", void 0, fallback)}${buildBorderTag("insideV", void 0, fallback)}</w:tblBorders>`;
|
|
765
|
+
})();
|
|
766
|
+
const tblW = `<w:tblW w:w="${normalizedColWidths.reduce((a, b) => a + b, 0)}" w:type="dxa"/>`;
|
|
767
|
+
const tblPr = `<w:tblPr>${tblW}<w:tblLayout w:type="fixed"/>${tblAlign}${tblBorder}</w:tblPr>`;
|
|
768
|
+
return `<w:tbl>${tblPr}${tblGrid}${rowXml.join("")}</w:tbl>`;
|
|
769
|
+
}
|
|
770
|
+
function buildParagraphXmlFromSingleInlineNode(node, baseStyle, result) {
|
|
771
|
+
const wrapper = {
|
|
772
|
+
type: "tag",
|
|
773
|
+
name: "p",
|
|
774
|
+
attribs: { style: "text-align: center;" },
|
|
775
|
+
children: [node]
|
|
776
|
+
};
|
|
777
|
+
return buildParagraphXmlFromContainer(wrapper, baseStyle, void 0, void 0, result);
|
|
778
|
+
}
|
|
779
|
+
function collectBodyBlocks(node, out, result) {
|
|
780
|
+
if (isSkippableSubtree(node)) return;
|
|
781
|
+
if (node.type === "tag") {
|
|
782
|
+
const tag = node.name?.toLowerCase();
|
|
783
|
+
if (isExplicitPageBreak(node)) {
|
|
784
|
+
out.push(PAGE_BREAK_XML);
|
|
785
|
+
return;
|
|
786
|
+
}
|
|
787
|
+
if (tag === "p") {
|
|
788
|
+
const pXml = buildParagraphXmlFromContainer(node, {}, void 0, void 0, result);
|
|
789
|
+
if (pXml) out.push(pXml);
|
|
790
|
+
return;
|
|
791
|
+
}
|
|
792
|
+
if (tag === "img" || tag === "canvas") {
|
|
793
|
+
const pXml = buildParagraphXmlFromSingleInlineNode(node, {}, result);
|
|
794
|
+
if (pXml) out.push(pXml);
|
|
795
|
+
return;
|
|
796
|
+
}
|
|
797
|
+
if (tag && /^h[1-6]$/.test(tag)) {
|
|
798
|
+
const level = Number(tag.slice(1));
|
|
799
|
+
const hXml = buildParagraphXmlFromContainer(node, {}, void 0, `Heading${level}`, result);
|
|
800
|
+
if (hXml) out.push(hXml);
|
|
801
|
+
return;
|
|
802
|
+
}
|
|
803
|
+
if (tag === "table") {
|
|
804
|
+
const tblXml = buildTableXml(node, result);
|
|
805
|
+
if (tblXml) out.push(tblXml);
|
|
806
|
+
return;
|
|
807
|
+
}
|
|
808
|
+
if (tag === "ul" || tag === "ol") {
|
|
809
|
+
out.push(...buildListBlocks(node, tag === "ol", 0, result));
|
|
810
|
+
return;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
for (const c of node.children ?? []) collectBodyBlocks(c, out, result);
|
|
814
|
+
}
|
|
815
|
+
function textToWordBodyXml(text) {
|
|
816
|
+
const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
817
|
+
if (!normalized.trim()) {
|
|
818
|
+
throw new Error("Text is empty.");
|
|
819
|
+
}
|
|
820
|
+
const lines = normalized.split("\n");
|
|
821
|
+
const out = [];
|
|
822
|
+
for (const line of lines) {
|
|
823
|
+
if (!line) {
|
|
824
|
+
out.push("<w:p/>");
|
|
825
|
+
continue;
|
|
826
|
+
}
|
|
827
|
+
out.push(`<w:p>${buildRunXml({}, line)}</w:p>`);
|
|
828
|
+
}
|
|
829
|
+
return out.join("");
|
|
830
|
+
}
|
|
831
|
+
function htmlToWordBody(html) {
|
|
832
|
+
const normalized = html.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
833
|
+
const doc = parseDocument(normalized, {
|
|
834
|
+
lowerCaseAttributeNames: true,
|
|
835
|
+
lowerCaseTags: true,
|
|
836
|
+
recognizeSelfClosing: true
|
|
837
|
+
});
|
|
838
|
+
const result = { bodyXml: "", images: [] };
|
|
839
|
+
const out = [];
|
|
840
|
+
collectBodyBlocks(doc, out, result);
|
|
841
|
+
result.bodyXml = out.join("");
|
|
842
|
+
return result;
|
|
843
|
+
}
|
|
844
|
+
function htmlToWordBodyXml(html) {
|
|
845
|
+
const { bodyXml } = htmlToWordBody(html);
|
|
846
|
+
if (!bodyXml) {
|
|
847
|
+
const text = getTextContent(
|
|
848
|
+
parseDocument(html, {
|
|
849
|
+
lowerCaseAttributeNames: true,
|
|
850
|
+
lowerCaseTags: true,
|
|
851
|
+
recognizeSelfClosing: true
|
|
852
|
+
})
|
|
853
|
+
);
|
|
854
|
+
return textToWordBodyXml(text);
|
|
855
|
+
}
|
|
856
|
+
return bodyXml;
|
|
857
|
+
}
|
|
858
|
+
function htmlToWordBodyWithAssets(html) {
|
|
859
|
+
const result = htmlToWordBody(html);
|
|
860
|
+
if (!result.bodyXml) {
|
|
861
|
+
const text = getTextContent(
|
|
862
|
+
parseDocument(html, {
|
|
863
|
+
lowerCaseAttributeNames: true,
|
|
864
|
+
lowerCaseTags: true,
|
|
865
|
+
recognizeSelfClosing: true
|
|
866
|
+
})
|
|
867
|
+
);
|
|
868
|
+
return { bodyXml: textToWordBodyXml(text), images: [] };
|
|
869
|
+
}
|
|
870
|
+
return result;
|
|
871
|
+
}
|
|
872
|
+
export {
|
|
873
|
+
htmlToWordBodyWithAssets,
|
|
874
|
+
htmlToWordBodyXml,
|
|
875
|
+
textToWordBodyXml
|
|
876
|
+
};
|
|
877
|
+
//# sourceMappingURL=htmlToWordBodyXml-LY6DZSTW.js.map
|