@yinyoudexing/xml2word 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -121,33 +121,76 @@ var init_validateXml = __esm({
121
121
  // src/lib/createDocxZip.ts
122
122
  var createDocxZip_exports = {};
123
123
  __export(createDocxZip_exports, {
124
- createDocxZipUint8Array: () => createDocxZipUint8Array
124
+ createDocxZipUint8Array: () => createDocxZipUint8Array,
125
+ createDocxZipWithAssetsUint8Array: () => createDocxZipWithAssetsUint8Array
125
126
  });
127
+ function buildContentTypesXml(assets) {
128
+ const defaults = /* @__PURE__ */ new Map();
129
+ defaults.set("rels", "application/vnd.openxmlformats-package.relationships+xml");
130
+ defaults.set("xml", "application/xml");
131
+ for (const asset of assets) {
132
+ if (!asset.contentType) continue;
133
+ const extMatch = asset.target.match(/\.([a-zA-Z0-9]+)$/);
134
+ if (!extMatch) continue;
135
+ const ext = extMatch[1].toLowerCase();
136
+ defaults.set(ext, asset.contentType);
137
+ }
138
+ const defaultLines = [...defaults.entries()].sort((a, b) => a[0].localeCompare(b[0])).map(([ext, ct]) => ` <Default Extension="${ext}" ContentType="${ct}"/>`).join("\n");
139
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
140
+ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
141
+ ${defaultLines}
142
+ <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
143
+ </Types>
144
+ `;
145
+ }
146
+ function buildDocumentRelsXml(assets) {
147
+ const relLines = assets.map(
148
+ (a) => ` <Relationship Id="${a.relationshipId}" Type="${a.relationshipType}" Target="${a.target}"/>`
149
+ ).join("\n");
150
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
151
+ <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
152
+ ${relLines}
153
+ </Relationships>
154
+ `;
155
+ }
126
156
  async function createDocxZipUint8Array(xml, options = {}) {
127
157
  const documentXml = normalizeDocumentXml(xml, options.inputKind ?? "auto");
128
158
  validateXmlIfNeeded(documentXml, options.validateXml ?? true);
129
159
  const zip = new import_jszip.default();
130
- zip.file("[Content_Types].xml", CONTENT_TYPES_XML);
160
+ zip.file("[Content_Types].xml", buildContentTypesXml([]));
161
+ const relsFolder = zip.folder("_rels");
162
+ relsFolder?.file(".rels", ROOT_RELS_XML);
163
+ const wordFolder = zip.folder("word");
164
+ wordFolder?.file("document.xml", documentXml);
165
+ return zip.generateAsync({ type: "uint8array" });
166
+ }
167
+ async function createDocxZipWithAssetsUint8Array(xml, options, assets) {
168
+ const documentXml = normalizeDocumentXml(xml, options.inputKind ?? "auto");
169
+ validateXmlIfNeeded(documentXml, options.validateXml ?? true);
170
+ const zip = new import_jszip.default();
171
+ zip.file("[Content_Types].xml", buildContentTypesXml(assets));
131
172
  const relsFolder = zip.folder("_rels");
132
173
  relsFolder?.file(".rels", ROOT_RELS_XML);
133
174
  const wordFolder = zip.folder("word");
134
175
  wordFolder?.file("document.xml", documentXml);
176
+ if (assets.length) {
177
+ const wordRelsFolder = wordFolder?.folder("_rels");
178
+ wordRelsFolder?.file("document.xml.rels", buildDocumentRelsXml(assets));
179
+ for (const asset of assets) {
180
+ const targetPath = asset.target.replace(/^\.\//, "");
181
+ const normalized = targetPath.startsWith("word/") ? targetPath.slice("word/".length) : targetPath;
182
+ wordFolder?.file(normalized, asset.data);
183
+ }
184
+ }
135
185
  return zip.generateAsync({ type: "uint8array" });
136
186
  }
137
- var import_jszip, CONTENT_TYPES_XML, ROOT_RELS_XML;
187
+ var import_jszip, ROOT_RELS_XML;
138
188
  var init_createDocxZip = __esm({
139
189
  "src/lib/createDocxZip.ts"() {
140
190
  "use strict";
141
191
  import_jszip = __toESM(require("jszip"), 1);
142
192
  init_normalizeDocumentXml();
143
193
  init_validateXml();
144
- CONTENT_TYPES_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
145
- <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
146
- <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
147
- <Default Extension="xml" ContentType="application/xml"/>
148
- <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
149
- </Types>
150
- `;
151
194
  ROOT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
152
195
  <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
153
196
  <Relationship Id="rId1"
@@ -161,6 +204,7 @@ var init_createDocxZip = __esm({
161
204
  // src/lib/htmlToWordBodyXml.ts
162
205
  var htmlToWordBodyXml_exports = {};
163
206
  __export(htmlToWordBodyXml_exports, {
207
+ htmlToWordBodyWithAssets: () => htmlToWordBodyWithAssets,
164
208
  htmlToWordBodyXml: () => htmlToWordBodyXml,
165
209
  textToWordBodyXml: () => textToWordBodyXml
166
210
  });
@@ -270,7 +314,107 @@ function getTextContent(node) {
270
314
  for (const c of children) out += getTextContent(c);
271
315
  return out;
272
316
  }
273
- function collectInlineRuns(node, inherited, out) {
317
+ function decodeBase64ToUint8Array(base64) {
318
+ const BufferCtor = globalThis.Buffer;
319
+ if (BufferCtor) {
320
+ return new Uint8Array(BufferCtor.from(base64, "base64"));
321
+ }
322
+ const atobFn = globalThis.atob;
323
+ if (!atobFn) {
324
+ throw new Error("Base64 decode is not available in this environment.");
325
+ }
326
+ const bin = atobFn(base64);
327
+ const bytes = new Uint8Array(bin.length);
328
+ for (let i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i);
329
+ return bytes;
330
+ }
331
+ function parseImageDataUrl(src) {
332
+ const m = src.match(/^data:(image\/png|image\/jpeg);base64,([\s\S]+)$/i);
333
+ if (!m) return void 0;
334
+ const contentType = m[1].toLowerCase();
335
+ const base64 = m[2].replace(/\s+/g, "");
336
+ const data = decodeBase64ToUint8Array(base64);
337
+ const extension = contentType === "image/png" ? "png" : "jpeg";
338
+ return { contentType, data, extension };
339
+ }
340
+ function parseCssLengthToPx(value) {
341
+ if (!value) return void 0;
342
+ const v = value.trim().toLowerCase();
343
+ const px = v.match(/^(\d+(?:\.\d+)?)px$/);
344
+ if (px) return Math.max(1, Math.round(Number(px[1])));
345
+ return void 0;
346
+ }
347
+ function readUInt32BE(bytes, offset) {
348
+ if (offset < 0 || offset + 4 > bytes.length) return void 0;
349
+ return ((bytes[offset] ?? 0) << 24 | (bytes[offset + 1] ?? 0) << 16 | (bytes[offset + 2] ?? 0) << 8 | (bytes[offset + 3] ?? 0)) >>> 0;
350
+ }
351
+ function parsePngDimensions(data) {
352
+ if (data.length < 24) return void 0;
353
+ const signature = [137, 80, 78, 71, 13, 10, 26, 10];
354
+ for (let i = 0; i < signature.length; i++) {
355
+ if ((data[i] ?? 0) !== signature[i]) return void 0;
356
+ }
357
+ const widthPx = readUInt32BE(data, 16);
358
+ const heightPx = readUInt32BE(data, 20);
359
+ if (!widthPx || !heightPx) return void 0;
360
+ return { widthPx, heightPx };
361
+ }
362
+ function parseJpegDimensions(data) {
363
+ if (data.length < 4) return void 0;
364
+ if (data[0] !== 255 || data[1] !== 216) return void 0;
365
+ let offset = 2;
366
+ while (offset + 4 <= data.length) {
367
+ if (data[offset] !== 255) {
368
+ offset++;
369
+ continue;
370
+ }
371
+ while (offset < data.length && data[offset] === 255) offset++;
372
+ if (offset >= data.length) return void 0;
373
+ const marker = data[offset];
374
+ offset++;
375
+ const isStandalone = marker === 217 || marker === 218;
376
+ if (isStandalone) break;
377
+ if (offset + 2 > data.length) return void 0;
378
+ const length = data[offset] << 8 | data[offset + 1];
379
+ if (length < 2 || offset + length > data.length) return void 0;
380
+ const isSof = marker === 192 || marker === 193 || marker === 194 || marker === 195 || marker === 197 || marker === 198 || marker === 199 || marker === 201 || marker === 202 || marker === 203 || marker === 205 || marker === 206 || marker === 207;
381
+ if (isSof) {
382
+ if (offset + 7 > data.length) return void 0;
383
+ const heightPx = data[offset + 3] << 8 | data[offset + 4];
384
+ const widthPx = data[offset + 5] << 8 | data[offset + 6];
385
+ if (!widthPx || !heightPx) return void 0;
386
+ return { widthPx, heightPx };
387
+ }
388
+ offset += length;
389
+ }
390
+ return void 0;
391
+ }
392
+ function parseIntrinsicImageSizePx(contentType, data) {
393
+ if (contentType === "image/png") return parsePngDimensions(data);
394
+ if (contentType === "image/jpeg") return parseJpegDimensions(data);
395
+ return void 0;
396
+ }
397
+ function applyMaxBoxPx(size, maxBox) {
398
+ const w = Math.max(1, Math.round(size.widthPx));
399
+ const h = Math.max(1, Math.round(size.heightPx));
400
+ const scale = Math.min(1, maxBox.maxWidthPx / w, maxBox.maxHeightPx / h);
401
+ return { widthPx: Math.max(1, Math.round(w * scale)), heightPx: Math.max(1, Math.round(h * scale)) };
402
+ }
403
+ function computeImageSizePx(node, intrinsic) {
404
+ const wAttr = node.attribs?.width ? Number(node.attribs.width) : void 0;
405
+ const hAttr = node.attribs?.height ? Number(node.attribs.height) : void 0;
406
+ const css = parseStyleAttribute(node.attribs?.style);
407
+ const wCss = parseCssLengthToPx(css.width);
408
+ const hCss = parseCssLengthToPx(css.height);
409
+ const widthAttrPx = Number.isFinite(wAttr) && wAttr ? Math.max(1, Math.round(wAttr)) : void 0;
410
+ const heightAttrPx = Number.isFinite(hAttr) && hAttr ? Math.max(1, Math.round(hAttr)) : void 0;
411
+ const ratio = intrinsic && intrinsic.widthPx > 0 && intrinsic.heightPx > 0 ? intrinsic.heightPx / intrinsic.widthPx : widthAttrPx && heightAttrPx ? heightAttrPx / widthAttrPx : 0.5;
412
+ const widthPx = typeof wCss === "number" ? wCss : typeof widthAttrPx === "number" ? widthAttrPx : intrinsic?.widthPx ?? 300;
413
+ const heightPx = typeof hCss === "number" ? hCss : typeof heightAttrPx === "number" ? heightAttrPx : intrinsic?.heightPx ?? 150;
414
+ const finalSize = typeof wCss === "number" && typeof hCss !== "number" ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof hCss === "number" && typeof wCss !== "number" ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : typeof widthAttrPx === "number" && typeof heightAttrPx !== "number" && intrinsic ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof heightAttrPx === "number" && typeof widthAttrPx !== "number" && intrinsic ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : { widthPx, heightPx };
415
+ return applyMaxBoxPx(finalSize, { maxWidthPx: 624, maxHeightPx: 864 });
416
+ }
417
+ function collectInlineRuns(node, inherited, out, result) {
274
418
  if (node.type === "text") {
275
419
  const text = node.data ?? "";
276
420
  if (text) out.push({ kind: "text", text, style: inherited });
@@ -282,13 +426,57 @@ function collectInlineRuns(node, inherited, out) {
282
426
  out.push({ kind: "br" });
283
427
  return;
284
428
  }
429
+ if (tag === "img") {
430
+ const src = node.attribs?.src;
431
+ if (!src) return;
432
+ const parsed = parseImageDataUrl(src);
433
+ if (!parsed) return;
434
+ const intrinsic = parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
435
+ const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
436
+ const id = result.images.length + 1;
437
+ const relationshipId = `rId${id}`;
438
+ const target = `media/image${id}.${parsed.extension}`;
439
+ result.images.push({
440
+ relationshipId,
441
+ target,
442
+ data: parsed.data,
443
+ contentType: parsed.contentType,
444
+ widthPx,
445
+ heightPx
446
+ });
447
+ out.push({ kind: "image", image: { relationshipId, widthPx, heightPx } });
448
+ return;
449
+ }
450
+ if (tag === "canvas") {
451
+ const dataUrl = node.attribs?.["data-image"] ?? node.attribs?.["data-src"];
452
+ if (!dataUrl) return;
453
+ const parsed = parseImageDataUrl(dataUrl);
454
+ if (!parsed) return;
455
+ const bufferW = node.attribs?.width ? Number(node.attribs.width) : void 0;
456
+ const bufferH = node.attribs?.height ? Number(node.attribs.height) : void 0;
457
+ const intrinsic = Number.isFinite(bufferW) && bufferW && Number.isFinite(bufferH) && bufferH ? { widthPx: Math.max(1, Math.round(bufferW)), heightPx: Math.max(1, Math.round(bufferH)) } : parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
458
+ const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
459
+ const id = result.images.length + 1;
460
+ const relationshipId = `rId${id}`;
461
+ const target = `media/image${id}.${parsed.extension}`;
462
+ result.images.push({
463
+ relationshipId,
464
+ target,
465
+ data: parsed.data,
466
+ contentType: parsed.contentType,
467
+ widthPx,
468
+ heightPx
469
+ });
470
+ out.push({ kind: "image", image: { relationshipId, widthPx, heightPx } });
471
+ return;
472
+ }
285
473
  const next = mergeTextStyle(inherited, styleFromElement(node));
286
474
  const children2 = node.children ?? [];
287
- for (const c of children2) collectInlineRuns(c, next, out);
475
+ for (const c of children2) collectInlineRuns(c, next, out, result);
288
476
  return;
289
477
  }
290
478
  const children = node.children ?? [];
291
- for (const c of children) collectInlineRuns(c, inherited, out);
479
+ for (const c of children) collectInlineRuns(c, inherited, out, result);
292
480
  }
293
481
  function buildRunXml(style, text) {
294
482
  const rPrParts = [];
@@ -309,6 +497,16 @@ function buildRunXml(style, text) {
309
497
  const preserve = shouldPreserveSpace(text) ? ' xml:space="preserve"' : "";
310
498
  return `<w:r>${rPrXml}<w:t${preserve}>${escaped}</w:t></w:r>`;
311
499
  }
500
+ function pxToEmu(px) {
501
+ return Math.max(1, Math.round(px * 9525));
502
+ }
503
+ function buildImageRunXml(image) {
504
+ const cx = pxToEmu(image.widthPx);
505
+ const cy = pxToEmu(image.heightPx);
506
+ const docPrId = image.relationshipId.replace(/^rId/, "");
507
+ const name = `Picture ${docPrId}`;
508
+ return `<w:r><w:drawing xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><wp:inline distT="0" distB="0" distL="0" distR="0"><wp:extent cx="${cx}" cy="${cy}"/><wp:docPr id="${docPrId}" name="${escapeXmlText(name)}"/><a:graphic><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic><pic:nvPicPr><pic:cNvPr id="0" name="${escapeXmlText(name)}"/><pic:cNvPicPr/></pic:nvPicPr><pic:blipFill><a:blip r:embed="${image.relationshipId}"/><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="${cx}" cy="${cy}"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr></pic:pic></a:graphicData></a:graphic></wp:inline></w:drawing></w:r>`;
509
+ }
312
510
  function hasClass(node, className) {
313
511
  const cls = node.attribs?.class;
314
512
  if (!cls) return false;
@@ -317,7 +515,11 @@ function hasClass(node, className) {
317
515
  function isSkippableSubtree(node) {
318
516
  if (node.type !== "tag") return false;
319
517
  const tag = node.name?.toLowerCase();
320
- if (tag === "button" || tag === "canvas") return true;
518
+ if (tag === "button") return true;
519
+ if (tag === "canvas") {
520
+ const dataUrl = node.attribs?.["data-image"] ?? node.attribs?.["data-src"];
521
+ if (!dataUrl) return true;
522
+ }
321
523
  if (tag === "img" && hasClass(node, "ProseMirror-separator")) return true;
322
524
  if (node.attribs?.id === "pages") return true;
323
525
  if (hasClass(node, "ProseMirror-widget")) return true;
@@ -409,17 +611,25 @@ function buildParagraphPrXml(node, baseFontHalfPoints, extraInd) {
409
611
  if (!parts.length) return "";
410
612
  return `<w:pPr>${parts.join("")}</w:pPr>`;
411
613
  }
412
- function buildParagraphXmlFromContainer(node, baseStyle, extraInd) {
614
+ function buildParagraphXmlFromContainer(node, baseStyle, extraInd, result) {
413
615
  const baseFontHalfPoints = baseStyle.fontSizeHalfPoints ?? inferFirstFontSizeHalfPoints(node) ?? 28;
414
616
  const pPrXml = buildParagraphPrXml(node, baseFontHalfPoints, extraInd);
415
617
  const runs = [];
416
- for (const c of node.children ?? []) collectInlineRuns(c, baseStyle, runs);
618
+ const res = result ?? {
619
+ bodyXml: "",
620
+ images: []
621
+ };
622
+ for (const c of node.children ?? []) collectInlineRuns(c, baseStyle, runs, res);
417
623
  const rXml = [];
418
624
  for (const token of runs) {
419
625
  if (token.kind === "br") {
420
626
  rXml.push("<w:r><w:br/></w:r>");
421
627
  continue;
422
628
  }
629
+ if (token.kind === "image") {
630
+ rXml.push(buildImageRunXml(token.image));
631
+ continue;
632
+ }
423
633
  const text = token.text;
424
634
  if (!text) continue;
425
635
  if (!text.trim()) continue;
@@ -446,7 +656,7 @@ function buildHeadingBaseStyle(level) {
446
656
  const size = level === 1 ? 44 : level === 2 ? 32 : level === 3 ? 28 : level === 4 ? 24 : 22;
447
657
  return { bold: true, fontSizeHalfPoints: size };
448
658
  }
449
- function buildListBlocks(listNode, ordered) {
659
+ function buildListBlocks(listNode, ordered, result) {
450
660
  const items = [];
451
661
  const stack = [...listNode.children ?? []];
452
662
  while (stack.length) {
@@ -460,13 +670,17 @@ function buildListBlocks(listNode, ordered) {
460
670
  const baseStyle = {};
461
671
  const runs = [];
462
672
  runs.push({ kind: "text", text: prefix, style: baseStyle });
463
- for (const c of li.children ?? []) collectInlineRuns(c, baseStyle, runs);
673
+ for (const c of li.children ?? []) collectInlineRuns(c, baseStyle, runs, result);
464
674
  const rXml = [];
465
675
  for (const token of runs) {
466
676
  if (token.kind === "br") {
467
677
  rXml.push("<w:r><w:br/></w:r>");
468
678
  continue;
469
679
  }
680
+ if (token.kind === "image") {
681
+ rXml.push(buildImageRunXml(token.image));
682
+ continue;
683
+ }
470
684
  const text = token.text;
471
685
  if (!text) continue;
472
686
  if (!text.trim()) continue;
@@ -481,7 +695,7 @@ function buildListBlocks(listNode, ordered) {
481
695
  }
482
696
  return out;
483
697
  }
484
- function buildTableXml(tableNode) {
698
+ function buildTableXml(tableNode, result) {
485
699
  const rows = [];
486
700
  const stack = [...tableNode.children ?? []];
487
701
  while (stack.length) {
@@ -498,7 +712,7 @@ function buildTableXml(tableNode) {
498
712
  for (const cell of cells) {
499
713
  const isHeader = cell.name === "th";
500
714
  const baseStyle = isHeader ? { bold: true } : {};
501
- const pXml = buildParagraphXmlFromContainer(cell, baseStyle);
715
+ const pXml = buildParagraphXmlFromContainer(cell, baseStyle, void 0, result);
502
716
  const paragraphs = pXml ? pXml : "<w:p/>";
503
717
  cellXml.push(
504
718
  `<w:tc><w:tcPr><w:tcW w:w="0" w:type="auto"/></w:tcPr>${paragraphs}</w:tc>`
@@ -510,7 +724,16 @@ function buildTableXml(tableNode) {
510
724
  const tblGrid = `<w:tblGrid/>`;
511
725
  return `<w:tbl>${tblPr}${tblGrid}${rowXml.join("")}</w:tbl>`;
512
726
  }
513
- function collectBodyBlocks(node, out) {
727
+ function buildParagraphXmlFromSingleInlineNode(node, baseStyle, result) {
728
+ const wrapper = {
729
+ type: "tag",
730
+ name: "p",
731
+ attribs: { style: "text-align: center;" },
732
+ children: [node]
733
+ };
734
+ return buildParagraphXmlFromContainer(wrapper, baseStyle, void 0, result);
735
+ }
736
+ function collectBodyBlocks(node, out, result) {
514
737
  if (isSkippableSubtree(node)) return;
515
738
  if (node.type === "tag") {
516
739
  const tag = node.name?.toLowerCase();
@@ -519,27 +742,32 @@ function collectBodyBlocks(node, out) {
519
742
  return;
520
743
  }
521
744
  if (tag === "p") {
522
- const pXml = buildParagraphXmlFromContainer(node, {});
745
+ const pXml = buildParagraphXmlFromContainer(node, {}, void 0, result);
746
+ if (pXml) out.push(pXml);
747
+ return;
748
+ }
749
+ if (tag === "img" || tag === "canvas") {
750
+ const pXml = buildParagraphXmlFromSingleInlineNode(node, {}, result);
523
751
  if (pXml) out.push(pXml);
524
752
  return;
525
753
  }
526
754
  if (tag && /^h[1-6]$/.test(tag)) {
527
755
  const level = Number(tag.slice(1));
528
- const hXml = buildParagraphXmlFromContainer(node, buildHeadingBaseStyle(level));
756
+ const hXml = buildParagraphXmlFromContainer(node, buildHeadingBaseStyle(level), void 0, result);
529
757
  if (hXml) out.push(hXml);
530
758
  return;
531
759
  }
532
760
  if (tag === "table") {
533
- const tblXml = buildTableXml(node);
761
+ const tblXml = buildTableXml(node, result);
534
762
  if (tblXml) out.push(tblXml);
535
763
  return;
536
764
  }
537
765
  if (tag === "ul" || tag === "ol") {
538
- out.push(...buildListBlocks(node, tag === "ol"));
766
+ out.push(...buildListBlocks(node, tag === "ol", result));
539
767
  return;
540
768
  }
541
769
  }
542
- for (const c of node.children ?? []) collectBodyBlocks(c, out);
770
+ for (const c of node.children ?? []) collectBodyBlocks(c, out, result);
543
771
  }
544
772
  function textToWordBodyXml(text) {
545
773
  const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
@@ -557,20 +785,46 @@ function textToWordBodyXml(text) {
557
785
  }
558
786
  return out.join("");
559
787
  }
560
- function htmlToWordBodyXml(html) {
788
+ function htmlToWordBody(html) {
561
789
  const normalized = html.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
562
790
  const doc = (0, import_htmlparser2.parseDocument)(normalized, {
563
791
  lowerCaseAttributeNames: true,
564
792
  lowerCaseTags: true,
565
793
  recognizeSelfClosing: true
566
794
  });
795
+ const result = { bodyXml: "", images: [] };
567
796
  const out = [];
568
- collectBodyBlocks(doc, out);
569
- if (!out.length) {
570
- const text = getTextContent(doc);
797
+ collectBodyBlocks(doc, out, result);
798
+ result.bodyXml = out.join("");
799
+ return result;
800
+ }
801
+ function htmlToWordBodyXml(html) {
802
+ const { bodyXml } = htmlToWordBody(html);
803
+ if (!bodyXml) {
804
+ const text = getTextContent(
805
+ (0, import_htmlparser2.parseDocument)(html, {
806
+ lowerCaseAttributeNames: true,
807
+ lowerCaseTags: true,
808
+ recognizeSelfClosing: true
809
+ })
810
+ );
571
811
  return textToWordBodyXml(text);
572
812
  }
573
- return out.join("");
813
+ return bodyXml;
814
+ }
815
+ function htmlToWordBodyWithAssets(html) {
816
+ const result = htmlToWordBody(html);
817
+ if (!result.bodyXml) {
818
+ const text = getTextContent(
819
+ (0, import_htmlparser2.parseDocument)(html, {
820
+ lowerCaseAttributeNames: true,
821
+ lowerCaseTags: true,
822
+ recognizeSelfClosing: true
823
+ })
824
+ );
825
+ return { bodyXml: textToWordBodyXml(text), images: [] };
826
+ }
827
+ return result;
574
828
  }
575
829
  var import_htmlparser2, PAGE_BREAK_XML;
576
830
  var init_htmlToWordBodyXml = __esm({
@@ -618,13 +872,36 @@ async function xmlToDocxBuffer(xml, options = {}) {
618
872
  return BufferCtor.from(docx);
619
873
  }
620
874
  async function htmlToDocxUint8Array(html, options = {}) {
621
- const { htmlToWordBodyXml: htmlToWordBodyXml2, textToWordBodyXml: textToWordBodyXml2 } = await Promise.resolve().then(() => (init_htmlToWordBodyXml(), htmlToWordBodyXml_exports));
875
+ const { htmlToWordBodyWithAssets: htmlToWordBodyWithAssets2, htmlToWordBodyXml: htmlToWordBodyXml2, textToWordBodyXml: textToWordBodyXml2 } = await Promise.resolve().then(() => (init_htmlToWordBodyXml(), htmlToWordBodyXml_exports));
876
+ const { createDocxZipWithAssetsUint8Array: createDocxZipWithAssetsUint8Array2 } = await Promise.resolve().then(() => (init_createDocxZip(), createDocxZip_exports));
622
877
  const format = options.inputFormat ?? "auto";
623
- const bodyXml = format === "html" ? htmlToWordBodyXml2(html) : format === "text" ? textToWordBodyXml2(html) : looksLikeHtml(html) ? htmlToWordBodyXml2(html) : textToWordBodyXml2(html);
624
- return xmlToDocxUint8Array(bodyXml, {
625
- inputKind: "body",
626
- validateXml: options.validateXml
627
- });
878
+ const isHtml = format === "html" ? true : format === "text" ? false : looksLikeHtml(html) ? true : false;
879
+ if (!isHtml) {
880
+ const bodyXml2 = textToWordBodyXml2(html);
881
+ return xmlToDocxUint8Array(bodyXml2, {
882
+ inputKind: "body",
883
+ validateXml: options.validateXml
884
+ });
885
+ }
886
+ const { bodyXml, images } = htmlToWordBodyWithAssets2(html);
887
+ if (!images.length) {
888
+ return xmlToDocxUint8Array(bodyXml || htmlToWordBodyXml2(html), {
889
+ inputKind: "body",
890
+ validateXml: options.validateXml
891
+ });
892
+ }
893
+ const assets = images.map((img) => ({
894
+ relationshipId: img.relationshipId,
895
+ relationshipType: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",
896
+ target: img.target,
897
+ data: img.data,
898
+ contentType: img.contentType
899
+ }));
900
+ return createDocxZipWithAssetsUint8Array2(
901
+ bodyXml,
902
+ { inputKind: "body", validateXml: options.validateXml },
903
+ assets
904
+ );
628
905
  }
629
906
  async function htmlToDocxBlob(html, options = {}) {
630
907
  const docx = await htmlToDocxUint8Array(html, options);