@yinyoudexing/xml2word 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{createDocxZip-WVDRDYZT.js → createDocxZip-WGQSPGIF.js} +49 -6
- package/dist/createDocxZip-WGQSPGIF.js.map +1 -0
- package/dist/{htmlToWordBodyXml-RFBPSL2Q.js → htmlToWordBodyXml-AG3GTZEZ.js} +233 -22
- package/dist/htmlToWordBodyXml-AG3GTZEZ.js.map +1 -0
- package/dist/index.cjs +314 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +30 -7
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/createDocxZip-WVDRDYZT.js.map +0 -1
- package/dist/htmlToWordBodyXml-RFBPSL2Q.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -121,33 +121,76 @@ var init_validateXml = __esm({
|
|
|
121
121
|
// src/lib/createDocxZip.ts
|
|
122
122
|
var createDocxZip_exports = {};
|
|
123
123
|
__export(createDocxZip_exports, {
|
|
124
|
-
createDocxZipUint8Array: () => createDocxZipUint8Array
|
|
124
|
+
createDocxZipUint8Array: () => createDocxZipUint8Array,
|
|
125
|
+
createDocxZipWithAssetsUint8Array: () => createDocxZipWithAssetsUint8Array
|
|
125
126
|
});
|
|
127
|
+
function buildContentTypesXml(assets) {
|
|
128
|
+
const defaults = /* @__PURE__ */ new Map();
|
|
129
|
+
defaults.set("rels", "application/vnd.openxmlformats-package.relationships+xml");
|
|
130
|
+
defaults.set("xml", "application/xml");
|
|
131
|
+
for (const asset of assets) {
|
|
132
|
+
if (!asset.contentType) continue;
|
|
133
|
+
const extMatch = asset.target.match(/\.([a-zA-Z0-9]+)$/);
|
|
134
|
+
if (!extMatch) continue;
|
|
135
|
+
const ext = extMatch[1].toLowerCase();
|
|
136
|
+
defaults.set(ext, asset.contentType);
|
|
137
|
+
}
|
|
138
|
+
const defaultLines = [...defaults.entries()].sort((a, b) => a[0].localeCompare(b[0])).map(([ext, ct]) => ` <Default Extension="${ext}" ContentType="${ct}"/>`).join("\n");
|
|
139
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
140
|
+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
141
|
+
${defaultLines}
|
|
142
|
+
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
143
|
+
</Types>
|
|
144
|
+
`;
|
|
145
|
+
}
|
|
146
|
+
function buildDocumentRelsXml(assets) {
|
|
147
|
+
const relLines = assets.map(
|
|
148
|
+
(a) => ` <Relationship Id="${a.relationshipId}" Type="${a.relationshipType}" Target="${a.target}"/>`
|
|
149
|
+
).join("\n");
|
|
150
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
151
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
152
|
+
${relLines}
|
|
153
|
+
</Relationships>
|
|
154
|
+
`;
|
|
155
|
+
}
|
|
126
156
|
async function createDocxZipUint8Array(xml, options = {}) {
|
|
127
157
|
const documentXml = normalizeDocumentXml(xml, options.inputKind ?? "auto");
|
|
128
158
|
validateXmlIfNeeded(documentXml, options.validateXml ?? true);
|
|
129
159
|
const zip = new import_jszip.default();
|
|
130
|
-
zip.file("[Content_Types].xml",
|
|
160
|
+
zip.file("[Content_Types].xml", buildContentTypesXml([]));
|
|
161
|
+
const relsFolder = zip.folder("_rels");
|
|
162
|
+
relsFolder?.file(".rels", ROOT_RELS_XML);
|
|
163
|
+
const wordFolder = zip.folder("word");
|
|
164
|
+
wordFolder?.file("document.xml", documentXml);
|
|
165
|
+
return zip.generateAsync({ type: "uint8array" });
|
|
166
|
+
}
|
|
167
|
+
async function createDocxZipWithAssetsUint8Array(xml, options, assets) {
|
|
168
|
+
const documentXml = normalizeDocumentXml(xml, options.inputKind ?? "auto");
|
|
169
|
+
validateXmlIfNeeded(documentXml, options.validateXml ?? true);
|
|
170
|
+
const zip = new import_jszip.default();
|
|
171
|
+
zip.file("[Content_Types].xml", buildContentTypesXml(assets));
|
|
131
172
|
const relsFolder = zip.folder("_rels");
|
|
132
173
|
relsFolder?.file(".rels", ROOT_RELS_XML);
|
|
133
174
|
const wordFolder = zip.folder("word");
|
|
134
175
|
wordFolder?.file("document.xml", documentXml);
|
|
176
|
+
if (assets.length) {
|
|
177
|
+
const wordRelsFolder = wordFolder?.folder("_rels");
|
|
178
|
+
wordRelsFolder?.file("document.xml.rels", buildDocumentRelsXml(assets));
|
|
179
|
+
for (const asset of assets) {
|
|
180
|
+
const targetPath = asset.target.replace(/^\.\//, "");
|
|
181
|
+
const normalized = targetPath.startsWith("word/") ? targetPath.slice("word/".length) : targetPath;
|
|
182
|
+
wordFolder?.file(normalized, asset.data);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
135
185
|
return zip.generateAsync({ type: "uint8array" });
|
|
136
186
|
}
|
|
137
|
-
var import_jszip,
|
|
187
|
+
var import_jszip, ROOT_RELS_XML;
|
|
138
188
|
var init_createDocxZip = __esm({
|
|
139
189
|
"src/lib/createDocxZip.ts"() {
|
|
140
190
|
"use strict";
|
|
141
191
|
import_jszip = __toESM(require("jszip"), 1);
|
|
142
192
|
init_normalizeDocumentXml();
|
|
143
193
|
init_validateXml();
|
|
144
|
-
CONTENT_TYPES_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
145
|
-
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
146
|
-
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
147
|
-
<Default Extension="xml" ContentType="application/xml"/>
|
|
148
|
-
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
149
|
-
</Types>
|
|
150
|
-
`;
|
|
151
194
|
ROOT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
152
195
|
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
153
196
|
<Relationship Id="rId1"
|
|
@@ -161,6 +204,7 @@ var init_createDocxZip = __esm({
|
|
|
161
204
|
// src/lib/htmlToWordBodyXml.ts
|
|
162
205
|
var htmlToWordBodyXml_exports = {};
|
|
163
206
|
__export(htmlToWordBodyXml_exports, {
|
|
207
|
+
htmlToWordBodyWithAssets: () => htmlToWordBodyWithAssets,
|
|
164
208
|
htmlToWordBodyXml: () => htmlToWordBodyXml,
|
|
165
209
|
textToWordBodyXml: () => textToWordBodyXml
|
|
166
210
|
});
|
|
@@ -270,7 +314,107 @@ function getTextContent(node) {
|
|
|
270
314
|
for (const c of children) out += getTextContent(c);
|
|
271
315
|
return out;
|
|
272
316
|
}
|
|
273
|
-
function
|
|
317
|
+
function decodeBase64ToUint8Array(base64) {
|
|
318
|
+
const BufferCtor = globalThis.Buffer;
|
|
319
|
+
if (BufferCtor) {
|
|
320
|
+
return new Uint8Array(BufferCtor.from(base64, "base64"));
|
|
321
|
+
}
|
|
322
|
+
const atobFn = globalThis.atob;
|
|
323
|
+
if (!atobFn) {
|
|
324
|
+
throw new Error("Base64 decode is not available in this environment.");
|
|
325
|
+
}
|
|
326
|
+
const bin = atobFn(base64);
|
|
327
|
+
const bytes = new Uint8Array(bin.length);
|
|
328
|
+
for (let i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i);
|
|
329
|
+
return bytes;
|
|
330
|
+
}
|
|
331
|
+
function parseImageDataUrl(src) {
|
|
332
|
+
const m = src.match(/^data:(image\/png|image\/jpeg);base64,([\s\S]+)$/i);
|
|
333
|
+
if (!m) return void 0;
|
|
334
|
+
const contentType = m[1].toLowerCase();
|
|
335
|
+
const base64 = m[2].replace(/\s+/g, "");
|
|
336
|
+
const data = decodeBase64ToUint8Array(base64);
|
|
337
|
+
const extension = contentType === "image/png" ? "png" : "jpeg";
|
|
338
|
+
return { contentType, data, extension };
|
|
339
|
+
}
|
|
340
|
+
function parseCssLengthToPx(value) {
|
|
341
|
+
if (!value) return void 0;
|
|
342
|
+
const v = value.trim().toLowerCase();
|
|
343
|
+
const px = v.match(/^(\d+(?:\.\d+)?)px$/);
|
|
344
|
+
if (px) return Math.max(1, Math.round(Number(px[1])));
|
|
345
|
+
return void 0;
|
|
346
|
+
}
|
|
347
|
+
function readUInt32BE(bytes, offset) {
|
|
348
|
+
if (offset < 0 || offset + 4 > bytes.length) return void 0;
|
|
349
|
+
return ((bytes[offset] ?? 0) << 24 | (bytes[offset + 1] ?? 0) << 16 | (bytes[offset + 2] ?? 0) << 8 | (bytes[offset + 3] ?? 0)) >>> 0;
|
|
350
|
+
}
|
|
351
|
+
function parsePngDimensions(data) {
|
|
352
|
+
if (data.length < 24) return void 0;
|
|
353
|
+
const signature = [137, 80, 78, 71, 13, 10, 26, 10];
|
|
354
|
+
for (let i = 0; i < signature.length; i++) {
|
|
355
|
+
if ((data[i] ?? 0) !== signature[i]) return void 0;
|
|
356
|
+
}
|
|
357
|
+
const widthPx = readUInt32BE(data, 16);
|
|
358
|
+
const heightPx = readUInt32BE(data, 20);
|
|
359
|
+
if (!widthPx || !heightPx) return void 0;
|
|
360
|
+
return { widthPx, heightPx };
|
|
361
|
+
}
|
|
362
|
+
function parseJpegDimensions(data) {
|
|
363
|
+
if (data.length < 4) return void 0;
|
|
364
|
+
if (data[0] !== 255 || data[1] !== 216) return void 0;
|
|
365
|
+
let offset = 2;
|
|
366
|
+
while (offset + 4 <= data.length) {
|
|
367
|
+
if (data[offset] !== 255) {
|
|
368
|
+
offset++;
|
|
369
|
+
continue;
|
|
370
|
+
}
|
|
371
|
+
while (offset < data.length && data[offset] === 255) offset++;
|
|
372
|
+
if (offset >= data.length) return void 0;
|
|
373
|
+
const marker = data[offset];
|
|
374
|
+
offset++;
|
|
375
|
+
const isStandalone = marker === 217 || marker === 218;
|
|
376
|
+
if (isStandalone) break;
|
|
377
|
+
if (offset + 2 > data.length) return void 0;
|
|
378
|
+
const length = data[offset] << 8 | data[offset + 1];
|
|
379
|
+
if (length < 2 || offset + length > data.length) return void 0;
|
|
380
|
+
const isSof = marker === 192 || marker === 193 || marker === 194 || marker === 195 || marker === 197 || marker === 198 || marker === 199 || marker === 201 || marker === 202 || marker === 203 || marker === 205 || marker === 206 || marker === 207;
|
|
381
|
+
if (isSof) {
|
|
382
|
+
if (offset + 7 > data.length) return void 0;
|
|
383
|
+
const heightPx = data[offset + 3] << 8 | data[offset + 4];
|
|
384
|
+
const widthPx = data[offset + 5] << 8 | data[offset + 6];
|
|
385
|
+
if (!widthPx || !heightPx) return void 0;
|
|
386
|
+
return { widthPx, heightPx };
|
|
387
|
+
}
|
|
388
|
+
offset += length;
|
|
389
|
+
}
|
|
390
|
+
return void 0;
|
|
391
|
+
}
|
|
392
|
+
function parseIntrinsicImageSizePx(contentType, data) {
|
|
393
|
+
if (contentType === "image/png") return parsePngDimensions(data);
|
|
394
|
+
if (contentType === "image/jpeg") return parseJpegDimensions(data);
|
|
395
|
+
return void 0;
|
|
396
|
+
}
|
|
397
|
+
function applyMaxBoxPx(size, maxBox) {
|
|
398
|
+
const w = Math.max(1, Math.round(size.widthPx));
|
|
399
|
+
const h = Math.max(1, Math.round(size.heightPx));
|
|
400
|
+
const scale = Math.min(1, maxBox.maxWidthPx / w, maxBox.maxHeightPx / h);
|
|
401
|
+
return { widthPx: Math.max(1, Math.round(w * scale)), heightPx: Math.max(1, Math.round(h * scale)) };
|
|
402
|
+
}
|
|
403
|
+
function computeImageSizePx(node, intrinsic) {
|
|
404
|
+
const wAttr = node.attribs?.width ? Number(node.attribs.width) : void 0;
|
|
405
|
+
const hAttr = node.attribs?.height ? Number(node.attribs.height) : void 0;
|
|
406
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
407
|
+
const wCss = parseCssLengthToPx(css.width);
|
|
408
|
+
const hCss = parseCssLengthToPx(css.height);
|
|
409
|
+
const widthAttrPx = Number.isFinite(wAttr) && wAttr ? Math.max(1, Math.round(wAttr)) : void 0;
|
|
410
|
+
const heightAttrPx = Number.isFinite(hAttr) && hAttr ? Math.max(1, Math.round(hAttr)) : void 0;
|
|
411
|
+
const ratio = intrinsic && intrinsic.widthPx > 0 && intrinsic.heightPx > 0 ? intrinsic.heightPx / intrinsic.widthPx : widthAttrPx && heightAttrPx ? heightAttrPx / widthAttrPx : 0.5;
|
|
412
|
+
const widthPx = typeof wCss === "number" ? wCss : typeof widthAttrPx === "number" ? widthAttrPx : intrinsic?.widthPx ?? 300;
|
|
413
|
+
const heightPx = typeof hCss === "number" ? hCss : typeof heightAttrPx === "number" ? heightAttrPx : intrinsic?.heightPx ?? 150;
|
|
414
|
+
const finalSize = typeof wCss === "number" && typeof hCss !== "number" ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof hCss === "number" && typeof wCss !== "number" ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : typeof widthAttrPx === "number" && typeof heightAttrPx !== "number" && intrinsic ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof heightAttrPx === "number" && typeof widthAttrPx !== "number" && intrinsic ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : { widthPx, heightPx };
|
|
415
|
+
return applyMaxBoxPx(finalSize, { maxWidthPx: 624, maxHeightPx: 864 });
|
|
416
|
+
}
|
|
417
|
+
function collectInlineRuns(node, inherited, out, result) {
|
|
274
418
|
if (node.type === "text") {
|
|
275
419
|
const text = node.data ?? "";
|
|
276
420
|
if (text) out.push({ kind: "text", text, style: inherited });
|
|
@@ -282,13 +426,57 @@ function collectInlineRuns(node, inherited, out) {
|
|
|
282
426
|
out.push({ kind: "br" });
|
|
283
427
|
return;
|
|
284
428
|
}
|
|
429
|
+
if (tag === "img") {
|
|
430
|
+
const src = node.attribs?.src;
|
|
431
|
+
if (!src) return;
|
|
432
|
+
const parsed = parseImageDataUrl(src);
|
|
433
|
+
if (!parsed) return;
|
|
434
|
+
const intrinsic = parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
|
|
435
|
+
const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
|
|
436
|
+
const id = result.images.length + 1;
|
|
437
|
+
const relationshipId = `rId${id}`;
|
|
438
|
+
const target = `media/image${id}.${parsed.extension}`;
|
|
439
|
+
result.images.push({
|
|
440
|
+
relationshipId,
|
|
441
|
+
target,
|
|
442
|
+
data: parsed.data,
|
|
443
|
+
contentType: parsed.contentType,
|
|
444
|
+
widthPx,
|
|
445
|
+
heightPx
|
|
446
|
+
});
|
|
447
|
+
out.push({ kind: "image", image: { relationshipId, widthPx, heightPx } });
|
|
448
|
+
return;
|
|
449
|
+
}
|
|
450
|
+
if (tag === "canvas") {
|
|
451
|
+
const dataUrl = node.attribs?.["data-image"] ?? node.attribs?.["data-src"];
|
|
452
|
+
if (!dataUrl) return;
|
|
453
|
+
const parsed = parseImageDataUrl(dataUrl);
|
|
454
|
+
if (!parsed) return;
|
|
455
|
+
const bufferW = node.attribs?.width ? Number(node.attribs.width) : void 0;
|
|
456
|
+
const bufferH = node.attribs?.height ? Number(node.attribs.height) : void 0;
|
|
457
|
+
const intrinsic = Number.isFinite(bufferW) && bufferW && Number.isFinite(bufferH) && bufferH ? { widthPx: Math.max(1, Math.round(bufferW)), heightPx: Math.max(1, Math.round(bufferH)) } : parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
|
|
458
|
+
const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
|
|
459
|
+
const id = result.images.length + 1;
|
|
460
|
+
const relationshipId = `rId${id}`;
|
|
461
|
+
const target = `media/image${id}.${parsed.extension}`;
|
|
462
|
+
result.images.push({
|
|
463
|
+
relationshipId,
|
|
464
|
+
target,
|
|
465
|
+
data: parsed.data,
|
|
466
|
+
contentType: parsed.contentType,
|
|
467
|
+
widthPx,
|
|
468
|
+
heightPx
|
|
469
|
+
});
|
|
470
|
+
out.push({ kind: "image", image: { relationshipId, widthPx, heightPx } });
|
|
471
|
+
return;
|
|
472
|
+
}
|
|
285
473
|
const next = mergeTextStyle(inherited, styleFromElement(node));
|
|
286
474
|
const children2 = node.children ?? [];
|
|
287
|
-
for (const c of children2) collectInlineRuns(c, next, out);
|
|
475
|
+
for (const c of children2) collectInlineRuns(c, next, out, result);
|
|
288
476
|
return;
|
|
289
477
|
}
|
|
290
478
|
const children = node.children ?? [];
|
|
291
|
-
for (const c of children) collectInlineRuns(c, inherited, out);
|
|
479
|
+
for (const c of children) collectInlineRuns(c, inherited, out, result);
|
|
292
480
|
}
|
|
293
481
|
function buildRunXml(style, text) {
|
|
294
482
|
const rPrParts = [];
|
|
@@ -309,6 +497,16 @@ function buildRunXml(style, text) {
|
|
|
309
497
|
const preserve = shouldPreserveSpace(text) ? ' xml:space="preserve"' : "";
|
|
310
498
|
return `<w:r>${rPrXml}<w:t${preserve}>${escaped}</w:t></w:r>`;
|
|
311
499
|
}
|
|
500
|
+
function pxToEmu(px) {
|
|
501
|
+
return Math.max(1, Math.round(px * 9525));
|
|
502
|
+
}
|
|
503
|
+
function buildImageRunXml(image) {
|
|
504
|
+
const cx = pxToEmu(image.widthPx);
|
|
505
|
+
const cy = pxToEmu(image.heightPx);
|
|
506
|
+
const docPrId = image.relationshipId.replace(/^rId/, "");
|
|
507
|
+
const name = `Picture ${docPrId}`;
|
|
508
|
+
return `<w:r><w:drawing xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><wp:inline distT="0" distB="0" distL="0" distR="0"><wp:extent cx="${cx}" cy="${cy}"/><wp:docPr id="${docPrId}" name="${escapeXmlText(name)}"/><a:graphic><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic><pic:nvPicPr><pic:cNvPr id="0" name="${escapeXmlText(name)}"/><pic:cNvPicPr/></pic:nvPicPr><pic:blipFill><a:blip r:embed="${image.relationshipId}"/><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="${cx}" cy="${cy}"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr></pic:pic></a:graphicData></a:graphic></wp:inline></w:drawing></w:r>`;
|
|
509
|
+
}
|
|
312
510
|
function hasClass(node, className) {
|
|
313
511
|
const cls = node.attribs?.class;
|
|
314
512
|
if (!cls) return false;
|
|
@@ -317,7 +515,11 @@ function hasClass(node, className) {
|
|
|
317
515
|
function isSkippableSubtree(node) {
|
|
318
516
|
if (node.type !== "tag") return false;
|
|
319
517
|
const tag = node.name?.toLowerCase();
|
|
320
|
-
if (tag === "button"
|
|
518
|
+
if (tag === "button") return true;
|
|
519
|
+
if (tag === "canvas") {
|
|
520
|
+
const dataUrl = node.attribs?.["data-image"] ?? node.attribs?.["data-src"];
|
|
521
|
+
if (!dataUrl) return true;
|
|
522
|
+
}
|
|
321
523
|
if (tag === "img" && hasClass(node, "ProseMirror-separator")) return true;
|
|
322
524
|
if (node.attribs?.id === "pages") return true;
|
|
323
525
|
if (hasClass(node, "ProseMirror-widget")) return true;
|
|
@@ -409,17 +611,25 @@ function buildParagraphPrXml(node, baseFontHalfPoints, extraInd) {
|
|
|
409
611
|
if (!parts.length) return "";
|
|
410
612
|
return `<w:pPr>${parts.join("")}</w:pPr>`;
|
|
411
613
|
}
|
|
412
|
-
function buildParagraphXmlFromContainer(node, baseStyle, extraInd) {
|
|
614
|
+
function buildParagraphXmlFromContainer(node, baseStyle, extraInd, result) {
|
|
413
615
|
const baseFontHalfPoints = baseStyle.fontSizeHalfPoints ?? inferFirstFontSizeHalfPoints(node) ?? 28;
|
|
414
616
|
const pPrXml = buildParagraphPrXml(node, baseFontHalfPoints, extraInd);
|
|
415
617
|
const runs = [];
|
|
416
|
-
|
|
618
|
+
const res = result ?? {
|
|
619
|
+
bodyXml: "",
|
|
620
|
+
images: []
|
|
621
|
+
};
|
|
622
|
+
for (const c of node.children ?? []) collectInlineRuns(c, baseStyle, runs, res);
|
|
417
623
|
const rXml = [];
|
|
418
624
|
for (const token of runs) {
|
|
419
625
|
if (token.kind === "br") {
|
|
420
626
|
rXml.push("<w:r><w:br/></w:r>");
|
|
421
627
|
continue;
|
|
422
628
|
}
|
|
629
|
+
if (token.kind === "image") {
|
|
630
|
+
rXml.push(buildImageRunXml(token.image));
|
|
631
|
+
continue;
|
|
632
|
+
}
|
|
423
633
|
const text = token.text;
|
|
424
634
|
if (!text) continue;
|
|
425
635
|
if (!text.trim()) continue;
|
|
@@ -446,7 +656,7 @@ function buildHeadingBaseStyle(level) {
|
|
|
446
656
|
const size = level === 1 ? 44 : level === 2 ? 32 : level === 3 ? 28 : level === 4 ? 24 : 22;
|
|
447
657
|
return { bold: true, fontSizeHalfPoints: size };
|
|
448
658
|
}
|
|
449
|
-
function buildListBlocks(listNode, ordered) {
|
|
659
|
+
function buildListBlocks(listNode, ordered, result) {
|
|
450
660
|
const items = [];
|
|
451
661
|
const stack = [...listNode.children ?? []];
|
|
452
662
|
while (stack.length) {
|
|
@@ -460,13 +670,17 @@ function buildListBlocks(listNode, ordered) {
|
|
|
460
670
|
const baseStyle = {};
|
|
461
671
|
const runs = [];
|
|
462
672
|
runs.push({ kind: "text", text: prefix, style: baseStyle });
|
|
463
|
-
for (const c of li.children ?? []) collectInlineRuns(c, baseStyle, runs);
|
|
673
|
+
for (const c of li.children ?? []) collectInlineRuns(c, baseStyle, runs, result);
|
|
464
674
|
const rXml = [];
|
|
465
675
|
for (const token of runs) {
|
|
466
676
|
if (token.kind === "br") {
|
|
467
677
|
rXml.push("<w:r><w:br/></w:r>");
|
|
468
678
|
continue;
|
|
469
679
|
}
|
|
680
|
+
if (token.kind === "image") {
|
|
681
|
+
rXml.push(buildImageRunXml(token.image));
|
|
682
|
+
continue;
|
|
683
|
+
}
|
|
470
684
|
const text = token.text;
|
|
471
685
|
if (!text) continue;
|
|
472
686
|
if (!text.trim()) continue;
|
|
@@ -481,7 +695,7 @@ function buildListBlocks(listNode, ordered) {
|
|
|
481
695
|
}
|
|
482
696
|
return out;
|
|
483
697
|
}
|
|
484
|
-
function buildTableXml(tableNode) {
|
|
698
|
+
function buildTableXml(tableNode, result) {
|
|
485
699
|
const rows = [];
|
|
486
700
|
const stack = [...tableNode.children ?? []];
|
|
487
701
|
while (stack.length) {
|
|
@@ -498,7 +712,7 @@ function buildTableXml(tableNode) {
|
|
|
498
712
|
for (const cell of cells) {
|
|
499
713
|
const isHeader = cell.name === "th";
|
|
500
714
|
const baseStyle = isHeader ? { bold: true } : {};
|
|
501
|
-
const pXml = buildParagraphXmlFromContainer(cell, baseStyle);
|
|
715
|
+
const pXml = buildParagraphXmlFromContainer(cell, baseStyle, void 0, result);
|
|
502
716
|
const paragraphs = pXml ? pXml : "<w:p/>";
|
|
503
717
|
cellXml.push(
|
|
504
718
|
`<w:tc><w:tcPr><w:tcW w:w="0" w:type="auto"/></w:tcPr>${paragraphs}</w:tc>`
|
|
@@ -510,7 +724,16 @@ function buildTableXml(tableNode) {
|
|
|
510
724
|
const tblGrid = `<w:tblGrid/>`;
|
|
511
725
|
return `<w:tbl>${tblPr}${tblGrid}${rowXml.join("")}</w:tbl>`;
|
|
512
726
|
}
|
|
513
|
-
function
|
|
727
|
+
function buildParagraphXmlFromSingleInlineNode(node, baseStyle, result) {
|
|
728
|
+
const wrapper = {
|
|
729
|
+
type: "tag",
|
|
730
|
+
name: "p",
|
|
731
|
+
attribs: { style: "text-align: center;" },
|
|
732
|
+
children: [node]
|
|
733
|
+
};
|
|
734
|
+
return buildParagraphXmlFromContainer(wrapper, baseStyle, void 0, result);
|
|
735
|
+
}
|
|
736
|
+
function collectBodyBlocks(node, out, result) {
|
|
514
737
|
if (isSkippableSubtree(node)) return;
|
|
515
738
|
if (node.type === "tag") {
|
|
516
739
|
const tag = node.name?.toLowerCase();
|
|
@@ -519,27 +742,32 @@ function collectBodyBlocks(node, out) {
|
|
|
519
742
|
return;
|
|
520
743
|
}
|
|
521
744
|
if (tag === "p") {
|
|
522
|
-
const pXml = buildParagraphXmlFromContainer(node, {});
|
|
745
|
+
const pXml = buildParagraphXmlFromContainer(node, {}, void 0, result);
|
|
746
|
+
if (pXml) out.push(pXml);
|
|
747
|
+
return;
|
|
748
|
+
}
|
|
749
|
+
if (tag === "img" || tag === "canvas") {
|
|
750
|
+
const pXml = buildParagraphXmlFromSingleInlineNode(node, {}, result);
|
|
523
751
|
if (pXml) out.push(pXml);
|
|
524
752
|
return;
|
|
525
753
|
}
|
|
526
754
|
if (tag && /^h[1-6]$/.test(tag)) {
|
|
527
755
|
const level = Number(tag.slice(1));
|
|
528
|
-
const hXml = buildParagraphXmlFromContainer(node, buildHeadingBaseStyle(level));
|
|
756
|
+
const hXml = buildParagraphXmlFromContainer(node, buildHeadingBaseStyle(level), void 0, result);
|
|
529
757
|
if (hXml) out.push(hXml);
|
|
530
758
|
return;
|
|
531
759
|
}
|
|
532
760
|
if (tag === "table") {
|
|
533
|
-
const tblXml = buildTableXml(node);
|
|
761
|
+
const tblXml = buildTableXml(node, result);
|
|
534
762
|
if (tblXml) out.push(tblXml);
|
|
535
763
|
return;
|
|
536
764
|
}
|
|
537
765
|
if (tag === "ul" || tag === "ol") {
|
|
538
|
-
out.push(...buildListBlocks(node, tag === "ol"));
|
|
766
|
+
out.push(...buildListBlocks(node, tag === "ol", result));
|
|
539
767
|
return;
|
|
540
768
|
}
|
|
541
769
|
}
|
|
542
|
-
for (const c of node.children ?? []) collectBodyBlocks(c, out);
|
|
770
|
+
for (const c of node.children ?? []) collectBodyBlocks(c, out, result);
|
|
543
771
|
}
|
|
544
772
|
function textToWordBodyXml(text) {
|
|
545
773
|
const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
@@ -557,20 +785,46 @@ function textToWordBodyXml(text) {
|
|
|
557
785
|
}
|
|
558
786
|
return out.join("");
|
|
559
787
|
}
|
|
560
|
-
function
|
|
788
|
+
function htmlToWordBody(html) {
|
|
561
789
|
const normalized = html.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
562
790
|
const doc = (0, import_htmlparser2.parseDocument)(normalized, {
|
|
563
791
|
lowerCaseAttributeNames: true,
|
|
564
792
|
lowerCaseTags: true,
|
|
565
793
|
recognizeSelfClosing: true
|
|
566
794
|
});
|
|
795
|
+
const result = { bodyXml: "", images: [] };
|
|
567
796
|
const out = [];
|
|
568
|
-
collectBodyBlocks(doc, out);
|
|
569
|
-
|
|
570
|
-
|
|
797
|
+
collectBodyBlocks(doc, out, result);
|
|
798
|
+
result.bodyXml = out.join("");
|
|
799
|
+
return result;
|
|
800
|
+
}
|
|
801
|
+
function htmlToWordBodyXml(html) {
|
|
802
|
+
const { bodyXml } = htmlToWordBody(html);
|
|
803
|
+
if (!bodyXml) {
|
|
804
|
+
const text = getTextContent(
|
|
805
|
+
(0, import_htmlparser2.parseDocument)(html, {
|
|
806
|
+
lowerCaseAttributeNames: true,
|
|
807
|
+
lowerCaseTags: true,
|
|
808
|
+
recognizeSelfClosing: true
|
|
809
|
+
})
|
|
810
|
+
);
|
|
571
811
|
return textToWordBodyXml(text);
|
|
572
812
|
}
|
|
573
|
-
return
|
|
813
|
+
return bodyXml;
|
|
814
|
+
}
|
|
815
|
+
function htmlToWordBodyWithAssets(html) {
|
|
816
|
+
const result = htmlToWordBody(html);
|
|
817
|
+
if (!result.bodyXml) {
|
|
818
|
+
const text = getTextContent(
|
|
819
|
+
(0, import_htmlparser2.parseDocument)(html, {
|
|
820
|
+
lowerCaseAttributeNames: true,
|
|
821
|
+
lowerCaseTags: true,
|
|
822
|
+
recognizeSelfClosing: true
|
|
823
|
+
})
|
|
824
|
+
);
|
|
825
|
+
return { bodyXml: textToWordBodyXml(text), images: [] };
|
|
826
|
+
}
|
|
827
|
+
return result;
|
|
574
828
|
}
|
|
575
829
|
var import_htmlparser2, PAGE_BREAK_XML;
|
|
576
830
|
var init_htmlToWordBodyXml = __esm({
|
|
@@ -618,13 +872,36 @@ async function xmlToDocxBuffer(xml, options = {}) {
|
|
|
618
872
|
return BufferCtor.from(docx);
|
|
619
873
|
}
|
|
620
874
|
async function htmlToDocxUint8Array(html, options = {}) {
|
|
621
|
-
const { htmlToWordBodyXml: htmlToWordBodyXml2, textToWordBodyXml: textToWordBodyXml2 } = await Promise.resolve().then(() => (init_htmlToWordBodyXml(), htmlToWordBodyXml_exports));
|
|
875
|
+
const { htmlToWordBodyWithAssets: htmlToWordBodyWithAssets2, htmlToWordBodyXml: htmlToWordBodyXml2, textToWordBodyXml: textToWordBodyXml2 } = await Promise.resolve().then(() => (init_htmlToWordBodyXml(), htmlToWordBodyXml_exports));
|
|
876
|
+
const { createDocxZipWithAssetsUint8Array: createDocxZipWithAssetsUint8Array2 } = await Promise.resolve().then(() => (init_createDocxZip(), createDocxZip_exports));
|
|
622
877
|
const format = options.inputFormat ?? "auto";
|
|
623
|
-
const
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
878
|
+
const isHtml = format === "html" ? true : format === "text" ? false : looksLikeHtml(html) ? true : false;
|
|
879
|
+
if (!isHtml) {
|
|
880
|
+
const bodyXml2 = textToWordBodyXml2(html);
|
|
881
|
+
return xmlToDocxUint8Array(bodyXml2, {
|
|
882
|
+
inputKind: "body",
|
|
883
|
+
validateXml: options.validateXml
|
|
884
|
+
});
|
|
885
|
+
}
|
|
886
|
+
const { bodyXml, images } = htmlToWordBodyWithAssets2(html);
|
|
887
|
+
if (!images.length) {
|
|
888
|
+
return xmlToDocxUint8Array(bodyXml || htmlToWordBodyXml2(html), {
|
|
889
|
+
inputKind: "body",
|
|
890
|
+
validateXml: options.validateXml
|
|
891
|
+
});
|
|
892
|
+
}
|
|
893
|
+
const assets = images.map((img) => ({
|
|
894
|
+
relationshipId: img.relationshipId,
|
|
895
|
+
relationshipType: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",
|
|
896
|
+
target: img.target,
|
|
897
|
+
data: img.data,
|
|
898
|
+
contentType: img.contentType
|
|
899
|
+
}));
|
|
900
|
+
return createDocxZipWithAssetsUint8Array2(
|
|
901
|
+
bodyXml,
|
|
902
|
+
{ inputKind: "body", validateXml: options.validateXml },
|
|
903
|
+
assets
|
|
904
|
+
);
|
|
628
905
|
}
|
|
629
906
|
async function htmlToDocxBlob(html, options = {}) {
|
|
630
907
|
const docx = await htmlToDocxUint8Array(html, options);
|