@yinyoudexing/xml2word 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,6 +71,92 @@ function normalizeFontFamily(value) {
71
71
  if (!first) return void 0;
72
72
  return first.replace(/^["']|["']$/g, "");
73
73
  }
74
+ function cssSelectorsHitTarget(selectorsText, targetSelector) {
75
+ const target = targetSelector.trim().toLowerCase();
76
+ if (!target) return false;
77
+ const selectors = selectorsText.replace(/\/\*[\s\S]*?\*\//g, " ").split(",").map((s) => s.replace(/\s+/g, " ").trim().toLowerCase()).filter(Boolean);
78
+ return selectors.some((s) => {
79
+ if (s === target) return true;
80
+ if (!s.startsWith(target)) return false;
81
+ const rest = s.slice(target.length);
82
+ return rest.startsWith(".") || rest.startsWith(":") || rest.startsWith("#") || rest.startsWith("[");
83
+ });
84
+ }
85
+ function parseCssRuleFromHtml(html, selector) {
86
+ const styleTagRegex = /<style\b[^>]*>([\s\S]*?)<\/style>/gi;
87
+ const merged = {};
88
+ let m;
89
+ while (m = styleTagRegex.exec(html)) {
90
+ const cssText = m[1] ?? "";
91
+ const target = selector.trim().toLowerCase();
92
+ if (!target) continue;
93
+ let depth = 0;
94
+ let lastRuleEnd = 0;
95
+ let ruleStart = -1;
96
+ let declStart = -1;
97
+ for (let i = 0; i < cssText.length; i++) {
98
+ const ch = cssText[i];
99
+ if (ch === "{") {
100
+ if (depth === 0) {
101
+ ruleStart = lastRuleEnd;
102
+ declStart = i + 1;
103
+ }
104
+ depth++;
105
+ continue;
106
+ }
107
+ if (ch === "}") {
108
+ if (depth > 0) depth--;
109
+ if (depth === 0 && ruleStart >= 0 && declStart >= 0) {
110
+ const selectorsText = cssText.slice(ruleStart, declStart - 1).trim().toLowerCase();
111
+ const decl = cssText.slice(declStart, i).trim();
112
+ lastRuleEnd = i + 1;
113
+ ruleStart = -1;
114
+ declStart = -1;
115
+ const hit = cssSelectorsHitTarget(selectorsText, target);
116
+ if (hit) Object.assign(merged, parseStyleAttribute(decl));
117
+ }
118
+ }
119
+ }
120
+ }
121
+ return merged;
122
+ }
123
+ function parseCssRuleFromHtmlFirst(html, selector) {
124
+ const styleTagRegex = /<style\b[^>]*>([\s\S]*?)<\/style>/gi;
125
+ let m;
126
+ while (m = styleTagRegex.exec(html)) {
127
+ const cssText = m[1] ?? "";
128
+ const target = selector.trim().toLowerCase();
129
+ if (!target) continue;
130
+ let depth = 0;
131
+ let lastRuleEnd = 0;
132
+ let ruleStart = -1;
133
+ let declStart = -1;
134
+ for (let i = 0; i < cssText.length; i++) {
135
+ const ch = cssText[i];
136
+ if (ch === "{") {
137
+ if (depth === 0) {
138
+ ruleStart = lastRuleEnd;
139
+ declStart = i + 1;
140
+ }
141
+ depth++;
142
+ continue;
143
+ }
144
+ if (ch === "}") {
145
+ if (depth > 0) depth--;
146
+ if (depth === 0 && ruleStart >= 0 && declStart >= 0) {
147
+ const selectorsText = cssText.slice(ruleStart, declStart - 1).trim().toLowerCase();
148
+ const decl = cssText.slice(declStart, i).trim();
149
+ lastRuleEnd = i + 1;
150
+ ruleStart = -1;
151
+ declStart = -1;
152
+ const hit = cssSelectorsHitTarget(selectorsText, target);
153
+ if (hit) return parseStyleAttribute(decl);
154
+ }
155
+ }
156
+ }
157
+ }
158
+ return {};
159
+ }
74
160
  function mergeTextStyle(base, patch) {
75
161
  return {
76
162
  bold: patch.bold ?? base.bold,
@@ -210,7 +296,7 @@ function applyMaxBoxPx(size, maxBox) {
210
296
  const scale = Math.min(1, maxBox.maxWidthPx / w, maxBox.maxHeightPx / h);
211
297
  return { widthPx: Math.max(1, Math.round(w * scale)), heightPx: Math.max(1, Math.round(h * scale)) };
212
298
  }
213
- function computeImageSizePx(node, intrinsic) {
299
+ function computeImageSizePx(node, intrinsic, maxBox) {
214
300
  const wAttr = node.attribs?.width ? Number(node.attribs.width) : void 0;
215
301
  const hAttr = node.attribs?.height ? Number(node.attribs.height) : void 0;
216
302
  const css = parseStyleAttribute(node.attribs?.style);
@@ -222,9 +308,9 @@ function computeImageSizePx(node, intrinsic) {
222
308
  const widthPx = typeof wCss === "number" ? wCss : typeof widthAttrPx === "number" ? widthAttrPx : intrinsic?.widthPx ?? 300;
223
309
  const heightPx = typeof hCss === "number" ? hCss : typeof heightAttrPx === "number" ? heightAttrPx : intrinsic?.heightPx ?? 150;
224
310
  const finalSize = typeof wCss === "number" && typeof hCss !== "number" ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof hCss === "number" && typeof wCss !== "number" ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : typeof widthAttrPx === "number" && typeof heightAttrPx !== "number" && intrinsic ? { widthPx, heightPx: Math.max(1, Math.round(widthPx * ratio)) } : typeof heightAttrPx === "number" && typeof widthAttrPx !== "number" && intrinsic ? { widthPx: Math.max(1, Math.round(heightPx / ratio)), heightPx } : { widthPx, heightPx };
225
- return applyMaxBoxPx(finalSize, { maxWidthPx: 624, maxHeightPx: 864 });
311
+ return applyMaxBoxPx(finalSize, maxBox ?? { maxWidthPx: 624, maxHeightPx: 864 });
226
312
  }
227
- function collectInlineRuns(node, inherited, out, result) {
313
+ function collectInlineRuns(node, inherited, out, result, ctx) {
228
314
  if (node.type === "text") {
229
315
  const text = node.data ?? "";
230
316
  if (text) out.push({ kind: "text", text, style: inherited });
@@ -242,7 +328,11 @@ function collectInlineRuns(node, inherited, out, result) {
242
328
  const parsed = parseImageDataUrl(src);
243
329
  if (!parsed) return;
244
330
  const intrinsic = parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
245
- const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
331
+ const { widthPx, heightPx } = computeImageSizePx(
332
+ node,
333
+ intrinsic,
334
+ ctx ? { maxWidthPx: ctx.maxImageWidthPx, maxHeightPx: ctx.maxImageHeightPx } : void 0
335
+ );
246
336
  const id = result.images.length + 1;
247
337
  const relationshipId = `rId${id + IMAGE_RELATIONSHIP_ID_OFFSET}`;
248
338
  const target = `media/image${id}.${parsed.extension}`;
@@ -265,7 +355,11 @@ function collectInlineRuns(node, inherited, out, result) {
265
355
  const bufferW = node.attribs?.width ? Number(node.attribs.width) : void 0;
266
356
  const bufferH = node.attribs?.height ? Number(node.attribs.height) : void 0;
267
357
  const intrinsic = Number.isFinite(bufferW) && bufferW && Number.isFinite(bufferH) && bufferH ? { widthPx: Math.max(1, Math.round(bufferW)), heightPx: Math.max(1, Math.round(bufferH)) } : parseIntrinsicImageSizePx(parsed.contentType, parsed.data);
268
- const { widthPx, heightPx } = computeImageSizePx(node, intrinsic);
358
+ const { widthPx, heightPx } = computeImageSizePx(
359
+ node,
360
+ intrinsic,
361
+ ctx ? { maxWidthPx: ctx.maxImageWidthPx, maxHeightPx: ctx.maxImageHeightPx } : void 0
362
+ );
269
363
  const id = result.images.length + 1;
270
364
  const relationshipId = `rId${id + IMAGE_RELATIONSHIP_ID_OFFSET}`;
271
365
  const target = `media/image${id}.${parsed.extension}`;
@@ -282,11 +376,11 @@ function collectInlineRuns(node, inherited, out, result) {
282
376
  }
283
377
  const next = mergeTextStyle(inherited, styleFromElement(node));
284
378
  const children2 = node.children ?? [];
285
- for (const c of children2) collectInlineRuns(c, next, out, result);
379
+ for (const c of children2) collectInlineRuns(c, next, out, result, ctx);
286
380
  return;
287
381
  }
288
382
  const children = node.children ?? [];
289
- for (const c of children) collectInlineRuns(c, inherited, out, result);
383
+ for (const c of children) collectInlineRuns(c, inherited, out, result, ctx);
290
384
  }
291
385
  function buildRunXml(style, text) {
292
386
  const rPrParts = [];
@@ -348,10 +442,29 @@ function parseCssLengthToTwips(value, baseFontHalfPoints) {
348
442
  const basePt = baseFontHalfPoints / 2;
349
443
  return Math.round(Number(em[1]) * basePt * 20);
350
444
  }
445
+ const rem = v.match(/^(-?\d+(?:\.\d+)?)rem$/);
446
+ if (rem) return Math.round(Number(rem[1]) * 16 * 72 * 20 / 96);
351
447
  const num = v.match(/^(-?\d+(?:\.\d+)?)$/);
352
448
  if (num) return Math.round(Number(num[1]));
353
449
  return void 0;
354
450
  }
451
+ function extractMarginBeforeAfterTwips(css, baseFontHalfPoints) {
452
+ const before = parseCssLengthToTwips(css["margin-top"], baseFontHalfPoints);
453
+ const after = parseCssLengthToTwips(css["margin-bottom"], baseFontHalfPoints);
454
+ if (typeof before === "number" || typeof after === "number") {
455
+ return { beforeTwips: before, afterTwips: after };
456
+ }
457
+ const m = css.margin?.trim().toLowerCase();
458
+ if (!m) return {};
459
+ const tokens = m.split(/\s+/).filter(Boolean);
460
+ if (!tokens.length) return {};
461
+ const topToken = tokens[0];
462
+ const bottomToken = tokens.length === 1 ? tokens[0] : tokens.length === 2 ? tokens[0] : tokens[2] ?? tokens[0];
463
+ return {
464
+ beforeTwips: parseCssLengthToTwips(topToken, baseFontHalfPoints),
465
+ afterTwips: parseCssLengthToTwips(bottomToken, baseFontHalfPoints)
466
+ };
467
+ }
355
468
  function inferFirstFontSizeHalfPoints(node) {
356
469
  const stack = [node];
357
470
  while (stack.length) {
@@ -368,8 +481,10 @@ function inferFirstFontSizeHalfPoints(node) {
368
481
  }
369
482
  return void 0;
370
483
  }
371
- function buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId) {
372
- const css = parseStyleAttribute(node.attribs?.style);
484
+ function buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId, defaultCss) {
485
+ const tag = node.type === "tag" ? node.name?.toLowerCase() : void 0;
486
+ const inlineCss = parseStyleAttribute(node.attribs?.style);
487
+ const css = defaultCss ? { ...defaultCss, ...inlineCss } : inlineCss;
373
488
  const parts = [];
374
489
  if (pStyleId) parts.push(`<w:pStyle w:val="${escapeXmlText(pStyleId)}"/>`);
375
490
  const shdHex = extractBackgroundFillHex(css);
@@ -396,10 +511,17 @@ function buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId) {
396
511
  if (typeof hangingTwips === "number") indAttrs.push(`w:hanging="${hangingTwips}"`);
397
512
  if (typeof firstLine === "number") indAttrs.push(`w:firstLine="${firstLine}"`);
398
513
  if (indAttrs.length) parts.push(`<w:ind ${indAttrs.join(" ")}/>`);
399
- const before = parseCssLengthToTwips(css["margin-top"], baseFontHalfPoints);
400
- const after = parseCssLengthToTwips(css["margin-bottom"], baseFontHalfPoints);
514
+ const hasInlineBefore = inlineCss["margin-top"] != null;
515
+ const hasInlineAfter = inlineCss["margin-bottom"] != null;
516
+ const beforeToken = inlineCss["margin-top"] ?? (pStyleId ? void 0 : defaultCss?.["margin-top"]);
517
+ const afterToken = inlineCss["margin-bottom"] ?? (pStyleId ? void 0 : defaultCss?.["margin-bottom"]);
518
+ let before = parseCssLengthToTwips(beforeToken, baseFontHalfPoints);
519
+ let after = parseCssLengthToTwips(afterToken, baseFontHalfPoints);
520
+ if (tag === "p" && !hasInlineBefore && typeof before === "number") before = Math.min(before, 160);
521
+ if (tag === "p" && !hasInlineAfter && typeof after === "number") after = Math.min(after, 160);
401
522
  const lineHeight = (() => {
402
- const lh = css["line-height"]?.trim().toLowerCase();
523
+ const lhToken = inlineCss["line-height"] ?? (pStyleId ? void 0 : defaultCss?.["line-height"]);
524
+ const lh = lhToken?.trim().toLowerCase();
403
525
  if (!lh || lh === "normal") return void 0;
404
526
  const unitless = lh.match(/^(\d+(?:\.\d+)?)$/);
405
527
  if (unitless) {
@@ -414,8 +536,16 @@ function buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId) {
414
536
  })();
415
537
  if (typeof before === "number" || typeof after === "number" || typeof lineHeight === "number") {
416
538
  const attrs = [];
417
- if (typeof before === "number") attrs.push(`w:before="${Math.max(0, before)}"`);
418
- if (typeof after === "number") attrs.push(`w:after="${Math.max(0, after)}"`);
539
+ if (typeof before === "number") {
540
+ attrs.push(`w:before="${Math.max(0, before)}"`);
541
+ } else if (typeof lineHeight === "number") {
542
+ attrs.push('w:before="0"');
543
+ }
544
+ if (typeof after === "number") {
545
+ attrs.push(`w:after="${Math.max(0, after)}"`);
546
+ } else if (typeof lineHeight === "number") {
547
+ attrs.push('w:after="160"');
548
+ }
419
549
  if (typeof lineHeight === "number") {
420
550
  attrs.push(`w:line="${lineHeight}"`, 'w:lineRule="exact"');
421
551
  }
@@ -424,16 +554,19 @@ function buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId) {
424
554
  if (!parts.length) return "";
425
555
  return `<w:pPr>${parts.join("")}</w:pPr>`;
426
556
  }
427
- function buildParagraphXmlFromContainer(node, baseStyle, extraInd, pStyleId, result) {
428
- const containerStyle = mergeTextStyle(baseStyle, styleFromElement(node));
557
+ function buildParagraphXmlFromContainer(node, baseStyle, extraInd, pStyleId, result, ctx) {
558
+ const seededBaseStyle = ctx ? { fontSizeHalfPoints: ctx.defaultBaseFontHalfPoints } : {};
559
+ const containerStyle = mergeTextStyle(mergeTextStyle(seededBaseStyle, baseStyle), styleFromElement(node));
429
560
  const baseFontHalfPoints = containerStyle.fontSizeHalfPoints ?? inferFirstFontSizeHalfPoints(node) ?? 28;
430
- const pPrXml = buildParagraphPrXml(node, baseFontHalfPoints, extraInd, pStyleId);
561
+ const computedBaseFontHalfPoints = containerStyle.fontSizeHalfPoints ?? inferFirstFontSizeHalfPoints(node) ?? ctx?.defaultBaseFontHalfPoints ?? 28;
562
+ const defaultCss = ctx ? node.type === "tag" && node.name?.toLowerCase() === "p" ? { ...ctx.defaultBodyCss, ...ctx.defaultPCss } : ctx.defaultBodyCss : void 0;
563
+ const pPrXml = buildParagraphPrXml(node, computedBaseFontHalfPoints, extraInd, pStyleId, defaultCss);
431
564
  const runs = [];
432
565
  const res = result ?? {
433
566
  bodyXml: "",
434
567
  images: []
435
568
  };
436
- for (const c of node.children ?? []) collectInlineRuns(c, containerStyle, runs, res);
569
+ for (const c of node.children ?? []) collectInlineRuns(c, containerStyle, runs, res, ctx);
437
570
  const rXml = [];
438
571
  for (const token of runs) {
439
572
  if (token.kind === "br") {
@@ -453,6 +586,11 @@ function buildParagraphXmlFromContainer(node, baseStyle, extraInd, pStyleId, res
453
586
  return `<w:p>${pPrXml}${rXml.join("")}</w:p>`;
454
587
  }
455
588
  var PAGE_BREAK_XML = '<w:p><w:r><w:br w:type="page"/></w:r></w:p>';
589
+ function buildSpacerParagraphXml(afterTwips) {
590
+ const after = Math.max(0, Math.round(afterTwips));
591
+ if (!after) return "";
592
+ return `<w:p><w:pPr><w:spacing w:before="0" w:after="${after}" w:line="1" w:lineRule="exact"/></w:pPr><w:r><w:rPr><w:sz w:val="1"/><w:szCs w:val="1"/></w:rPr><w:t></w:t></w:r></w:p>`;
593
+ }
456
594
  function isExplicitPageBreak(node) {
457
595
  if (node.type !== "tag") return false;
458
596
  const tag = node.name?.toLowerCase();
@@ -461,13 +599,14 @@ function isExplicitPageBreak(node) {
461
599
  const classList = cls ? cls.split(/\s+/) : [];
462
600
  if (tag === "hr" && classList.includes("page-break")) return true;
463
601
  if (classList.includes("page-break")) return true;
602
+ if (classList.includes("umo-page-break")) return true;
464
603
  if (node.attribs?.["data-page-break"] === "true") return true;
465
604
  const after = css["page-break-after"]?.toLowerCase() ?? css["break-after"]?.toLowerCase();
466
605
  const before = css["page-break-before"]?.toLowerCase() ?? css["break-before"]?.toLowerCase();
467
606
  if (after?.includes("always") || before?.includes("always")) return true;
468
607
  return false;
469
608
  }
470
- function buildListBlocks(listNode, ordered, level, result) {
609
+ function buildListBlocks(listNode, ordered, level, result, ctx) {
471
610
  const liNodes = (listNode.children ?? []).filter(
472
611
  (c) => c.type === "tag" && c.name?.toLowerCase() === "li"
473
612
  );
@@ -489,7 +628,7 @@ function buildListBlocks(listNode, ordered, level, result) {
489
628
  continue;
490
629
  }
491
630
  }
492
- collectInlineRuns(c, baseStyle, runs, result);
631
+ collectInlineRuns(c, baseStyle, runs, result, ctx);
493
632
  }
494
633
  const rXml = [];
495
634
  for (const token of runs) {
@@ -507,12 +646,13 @@ function buildListBlocks(listNode, ordered, level, result) {
507
646
  rXml.push(buildRunXml(token.style, text));
508
647
  }
509
648
  if (rXml.length) {
510
- const baseFontHalfPoints = inferFirstFontSizeHalfPoints(li) ?? 28;
649
+ const baseFontHalfPoints = inferFirstFontSizeHalfPoints(li) ?? ctx?.defaultBaseFontHalfPoints ?? 28;
511
650
  const pPrXml = buildParagraphPrXml(
512
651
  li,
513
652
  baseFontHalfPoints,
514
653
  { leftTwips, hangingTwips },
515
- void 0
654
+ void 0,
655
+ ctx?.defaultBodyCss
516
656
  );
517
657
  const numPrXml = `<w:numPr><w:ilvl w:val="${ilvl}"/><w:numId w:val="${numId}"/></w:numPr>`;
518
658
  const mergedPPrXml = pPrXml ? pPrXml.replace("<w:pPr>", `<w:pPr>${numPrXml}`) : `<w:pPr>${numPrXml}<w:ind w:left="${leftTwips}" w:hanging="${hangingTwips}"/></w:pPr>`;
@@ -520,7 +660,7 @@ function buildListBlocks(listNode, ordered, level, result) {
520
660
  }
521
661
  for (const nested of nestedLists) {
522
662
  const nestedOrdered = nested.name?.toLowerCase() === "ol";
523
- out.push(...buildListBlocks(nested, nestedOrdered, ilvl + 1, result));
663
+ out.push(...buildListBlocks(nested, nestedOrdered, ilvl + 1, result, ctx));
524
664
  }
525
665
  }
526
666
  return out;
@@ -599,7 +739,7 @@ function injectTableCellParagraphSpacing(pXml) {
599
739
  }
600
740
  return pXml.replace("<w:p>", `<w:p><w:pPr>${spacingXml}</w:pPr>`);
601
741
  }
602
- function buildTableCellBlocksXml(cell, baseStyle, result) {
742
+ function buildTableCellBlocksXml(cell, baseStyle, result, ctx) {
603
743
  const children = cell.children ?? [];
604
744
  const hasBlocks = children.some((c) => {
605
745
  if (c.type !== "tag") return false;
@@ -608,7 +748,7 @@ function buildTableCellBlocksXml(cell, baseStyle, result) {
608
748
  });
609
749
  const out = [];
610
750
  if (!hasBlocks) {
611
- const p = buildParagraphXmlFromContainer(cell, baseStyle, void 0, void 0, result);
751
+ const p = buildParagraphXmlFromContainer(cell, baseStyle, void 0, void 0, result, ctx);
612
752
  if (p) out.push(p);
613
753
  return out.length ? out.map(injectTableCellParagraphSpacing).join("") : "<w:p/>";
614
754
  }
@@ -616,18 +756,18 @@ function buildTableCellBlocksXml(cell, baseStyle, result) {
616
756
  if (c.type === "tag") {
617
757
  const tag = c.name?.toLowerCase();
618
758
  if (tag === "p") {
619
- const p = buildParagraphXmlFromContainer(c, baseStyle, void 0, void 0, result);
759
+ const p = buildParagraphXmlFromContainer(c, baseStyle, void 0, void 0, result, ctx);
620
760
  if (p) out.push(p);
621
761
  continue;
622
762
  }
623
763
  if (tag && /^h[1-6]$/.test(tag)) {
624
764
  const level = Number(tag.slice(1));
625
- const p = buildParagraphXmlFromContainer(c, baseStyle, void 0, `Heading${level}`, result);
765
+ const p = buildParagraphXmlFromContainer(c, baseStyle, void 0, `Heading${level}`, result, ctx);
626
766
  if (p) out.push(p);
627
767
  continue;
628
768
  }
629
769
  if (tag === "ul" || tag === "ol") {
630
- out.push(...buildListBlocks(c, tag === "ol", 0, result));
770
+ out.push(...buildListBlocks(c, tag === "ol", 0, result, ctx));
631
771
  continue;
632
772
  }
633
773
  if (tag === "img" || tag === "canvas") {
@@ -640,7 +780,7 @@ function buildTableCellBlocksXml(cell, baseStyle, result) {
640
780
  if (!out.length) return "<w:p/>";
641
781
  return out.map(injectTableCellParagraphSpacing).join("");
642
782
  }
643
- function buildTableXml(tableNode, result) {
783
+ function buildTableXml(tableNode, result, ctx) {
644
784
  const rows = [];
645
785
  const stack = [...tableNode.children ?? []];
646
786
  while (stack.length) {
@@ -652,9 +792,19 @@ function buildTableXml(tableNode, result) {
652
792
  (tr) => (tr.children ?? []).filter((c) => c.type === "tag" && (c.name === "td" || c.name === "th"))
653
793
  );
654
794
  const colCount = Math.max(0, ...rowCells.map((cells) => cells.length));
655
- const maxTableWidthTwips = 9360;
795
+ const maxTableWidthTwips = ctx?.maxTableWidthTwips ?? 9360;
796
+ const colGroup = (tableNode.children ?? []).find((c) => c.type === "tag" && c.name === "colgroup");
797
+ const colWidthsFromGroup = [];
798
+ if (colGroup) {
799
+ const cols = (colGroup.children ?? []).filter((c) => c.type === "tag" && c.name === "col");
800
+ for (const col of cols) {
801
+ const css = parseStyleAttribute(col.attribs?.style);
802
+ const w = parseCssLengthToTwips(css.width ?? css["min-width"], 28);
803
+ colWidthsFromGroup.push(w);
804
+ }
805
+ }
656
806
  const estimatedColWidths = new Array(colCount).fill(0).map((_, i) => {
657
- let explicit;
807
+ let explicit = colWidthsFromGroup[i];
658
808
  let estimated = 0;
659
809
  for (const cells of rowCells) {
660
810
  const cell = cells[i];
@@ -667,7 +817,7 @@ function buildTableXml(tableNode, result) {
667
817
  const wTwips = estimateTextWidthTwips(text, baseFontHalfPoints) + 240;
668
818
  estimated = Math.max(estimated, wTwips);
669
819
  }
670
- const base = typeof explicit === "number" ? explicit : estimated || Math.round(maxTableWidthTwips / Math.max(1, colCount));
820
+ const base = typeof explicit === "number" ? Math.max(explicit, estimated) : estimated || Math.round(maxTableWidthTwips / Math.max(1, colCount));
671
821
  return Math.max(720, Math.min(6e3, Math.round(base)));
672
822
  });
673
823
  const normalizedColWidths = (() => {
@@ -692,7 +842,7 @@ function buildTableXml(tableNode, result) {
692
842
  const cell = cells[i];
693
843
  const isHeader = cell.name === "th";
694
844
  const baseStyle = isHeader ? { bold: true } : {};
695
- const paragraphs = buildTableCellBlocksXml(cell, baseStyle, result);
845
+ const paragraphs = buildTableCellBlocksXml(cell, baseStyle, result, ctx);
696
846
  const css = parseStyleAttribute(cell.attribs?.style);
697
847
  const widthTwips = parseCellWidthTwips(cell) ?? normalizedColWidths[i];
698
848
  const tcW = typeof widthTwips === "number" ? `<w:tcW w:w="${widthTwips}" w:type="dxa"/>` : `<w:tcW w:w="0" w:type="auto"/>`;
@@ -781,14 +931,14 @@ function buildTableXml(tableNode, result) {
781
931
  const tblPr = `<w:tblPr>${tblW}<w:tblLayout w:type="fixed"/>${tblAlign}${tblBorder}</w:tblPr>`;
782
932
  return `<w:tbl>${tblPr}${tblGrid}${rowXml.join("")}</w:tbl>`;
783
933
  }
784
- function buildParagraphXmlFromSingleInlineNode(node, baseStyle, result) {
934
+ function buildParagraphXmlFromSingleInlineNode(node, baseStyle, result, ctx) {
785
935
  const wrapper = {
786
936
  type: "tag",
787
937
  name: "p",
788
938
  attribs: { style: "text-align: center;" },
789
939
  children: [node]
790
940
  };
791
- return buildParagraphXmlFromContainer(wrapper, baseStyle, void 0, void 0, result);
941
+ return buildParagraphXmlFromContainer(wrapper, baseStyle, void 0, void 0, result, ctx);
792
942
  }
793
943
  function isRecognizedBlockTag(tag) {
794
944
  if (!tag) return false;
@@ -800,7 +950,22 @@ function isRecognizedBlockTag(tag) {
800
950
  if (tag === "pre") return true;
801
951
  return false;
802
952
  }
803
- function collectDivBlocks(node, out, result) {
953
+ function subtreeHasRecognizedBlocks(root) {
954
+ const stack = [root];
955
+ while (stack.length) {
956
+ const cur = stack.pop();
957
+ if (cur.type === "tag") {
958
+ if (isExplicitPageBreak(cur)) return true;
959
+ if (isRecognizedBlockTag(cur.name?.toLowerCase())) return true;
960
+ }
961
+ const children = cur.children ?? [];
962
+ for (let i = children.length - 1; i >= 0; i--) {
963
+ stack.push(children[i]);
964
+ }
965
+ }
966
+ return false;
967
+ }
968
+ function collectDivBlocks(node, out, result, ctx) {
804
969
  const parentStyle = node.attribs?.style;
805
970
  const inlineBuffer = [];
806
971
  const flushInline = () => {
@@ -810,7 +975,7 @@ function collectDivBlocks(node, out, result) {
810
975
  attribs: { style: parentStyle },
811
976
  children: inlineBuffer.splice(0)
812
977
  };
813
- const pXml = buildParagraphXmlFromContainer(wrapper, {}, void 0, void 0, result);
978
+ const pXml = buildParagraphXmlFromContainer(wrapper, {}, void 0, void 0, result, ctx);
814
979
  if (pXml) out.push(pXml);
815
980
  };
816
981
  const children = node.children ?? [];
@@ -824,19 +989,15 @@ function collectDivBlocks(node, out, result) {
824
989
  }
825
990
  if (isRecognizedBlockTag(tag)) {
826
991
  if (inlineBuffer.length) flushInline();
827
- collectBodyBlocks(child, out, result);
992
+ collectBodyBlocks(child, out, result, ctx);
993
+ continue;
994
+ }
995
+ if (subtreeHasRecognizedBlocks(child)) {
996
+ if (inlineBuffer.length) flushInline();
997
+ collectBodyBlocks(child, out, result, ctx);
828
998
  continue;
829
999
  }
830
1000
  if (tag === "div") {
831
- const childHasRecognizedBlocks = (child.children ?? []).some((gc) => {
832
- if (gc.type !== "tag") return false;
833
- return isRecognizedBlockTag(gc.name?.toLowerCase());
834
- });
835
- if (childHasRecognizedBlocks) {
836
- if (inlineBuffer.length) flushInline();
837
- collectBodyBlocks(child, out, result);
838
- continue;
839
- }
840
1001
  if (inlineBuffer.length) flushInline();
841
1002
  const mergedStyle = [parentStyle, child.attribs?.style].filter(Boolean).join(";");
842
1003
  const wrapper = {
@@ -845,7 +1006,7 @@ function collectDivBlocks(node, out, result) {
845
1006
  attribs: { style: mergedStyle || void 0 },
846
1007
  children: child.children ?? []
847
1008
  };
848
- const pXml = buildParagraphXmlFromContainer(wrapper, {}, void 0, void 0, result);
1009
+ const pXml = buildParagraphXmlFromContainer(wrapper, {}, void 0, void 0, result, ctx);
849
1010
  if (pXml) out.push(pXml);
850
1011
  continue;
851
1012
  }
@@ -854,7 +1015,7 @@ function collectDivBlocks(node, out, result) {
854
1015
  }
855
1016
  if (inlineBuffer.length) flushInline();
856
1017
  }
857
- function collectBodyBlocks(node, out, result) {
1018
+ function collectBodyBlocks(node, out, result, ctx) {
858
1019
  if (isSkippableSubtree(node)) return;
859
1020
  if (node.type === "tag") {
860
1021
  const tag = node.name?.toLowerCase();
@@ -863,36 +1024,51 @@ function collectBodyBlocks(node, out, result) {
863
1024
  return;
864
1025
  }
865
1026
  if (tag === "p") {
866
- const pXml = buildParagraphXmlFromContainer(node, {}, void 0, void 0, result);
1027
+ const pXml = buildParagraphXmlFromContainer(node, {}, void 0, void 0, result, ctx);
867
1028
  if (pXml) out.push(pXml);
868
1029
  return;
869
1030
  }
870
1031
  if (tag === "img" || tag === "canvas") {
871
- const pXml = buildParagraphXmlFromSingleInlineNode(node, {}, result);
1032
+ const pXml = buildParagraphXmlFromSingleInlineNode(node, {}, result, ctx);
872
1033
  if (pXml) out.push(pXml);
873
1034
  return;
874
1035
  }
875
1036
  if (tag && /^h[1-6]$/.test(tag)) {
876
1037
  const level = Number(tag.slice(1));
877
- const hXml = buildParagraphXmlFromContainer(node, {}, void 0, `Heading${level}`, result);
1038
+ const hXml = buildParagraphXmlFromContainer(node, {}, void 0, `Heading${level}`, result, ctx);
878
1039
  if (hXml) out.push(hXml);
879
1040
  return;
880
1041
  }
881
1042
  if (tag === "table") {
882
- const tblXml = buildTableXml(node, result);
1043
+ const tblXml = buildTableXml(node, result, ctx);
883
1044
  if (tblXml) out.push(tblXml);
884
1045
  return;
885
1046
  }
886
1047
  if (tag === "ul" || tag === "ol") {
887
- out.push(...buildListBlocks(node, tag === "ol", 0, result));
1048
+ out.push(...buildListBlocks(node, tag === "ol", 0, result, ctx));
888
1049
  return;
889
1050
  }
890
1051
  if (tag === "div") {
891
- collectDivBlocks(node, out, result);
1052
+ if (hasClass(node, "tableWrapper")) {
1053
+ const display = ctx.tableWrapperCss.display?.trim().toLowerCase();
1054
+ if (display !== "contents") {
1055
+ const baseFontHalfPoints = inferFirstFontSizeHalfPoints(node) ?? ctx.defaultBaseFontHalfPoints;
1056
+ const { beforeTwips, afterTwips } = extractMarginBeforeAfterTwips(ctx.tableWrapperCss, baseFontHalfPoints);
1057
+ const beforeXml = typeof beforeTwips === "number" && beforeTwips > 0 ? buildSpacerParagraphXml(beforeTwips) : "";
1058
+ const afterXml = typeof afterTwips === "number" && afterTwips > 0 ? buildSpacerParagraphXml(afterTwips) : "";
1059
+ if (beforeXml) out.push(beforeXml);
1060
+ collectDivBlocks(node, out, result, ctx);
1061
+ if (afterXml) out.push(afterXml);
1062
+ return;
1063
+ }
1064
+ collectDivBlocks(node, out, result, ctx);
1065
+ return;
1066
+ }
1067
+ collectDivBlocks(node, out, result, ctx);
892
1068
  return;
893
1069
  }
894
1070
  }
895
- for (const c of node.children ?? []) collectBodyBlocks(c, out, result);
1071
+ for (const c of node.children ?? []) collectBodyBlocks(c, out, result, ctx);
896
1072
  }
897
1073
  function textToWordBodyXml(text) {
898
1074
  const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
@@ -910,8 +1086,22 @@ function textToWordBodyXml(text) {
910
1086
  }
911
1087
  return out.join("");
912
1088
  }
913
- function htmlToWordBody(html) {
1089
+ function htmlToWordBody(html, layout) {
914
1090
  const normalized = html.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
1091
+ const bodyCss = parseCssRuleFromHtmlFirst(normalized, "body");
1092
+ const pCss = parseCssRuleFromHtmlFirst(normalized, "p");
1093
+ const defaultBaseFontHalfPoints = parseFontSizeToHalfPoints(bodyCss["font-size"]) ?? 28;
1094
+ const defaultBodyCss = {};
1095
+ if (bodyCss["line-height"]) defaultBodyCss["line-height"] = bodyCss["line-height"];
1096
+ const defaultPCss = {};
1097
+ if (pCss["line-height"]) defaultPCss["line-height"] = pCss["line-height"];
1098
+ if (pCss["margin-top"]) defaultPCss["margin-top"] = pCss["margin-top"];
1099
+ if (pCss["margin-bottom"]) defaultPCss["margin-bottom"] = pCss["margin-bottom"];
1100
+ if (pCss["text-align"]) defaultPCss["text-align"] = pCss["text-align"];
1101
+ const tableWrapperCss = {
1102
+ ...parseCssRuleFromHtml(normalized, ".tableWrapper"),
1103
+ ...parseCssRuleFromHtml(normalized, ".tiptap .tableWrapper")
1104
+ };
915
1105
  const doc = parseDocument(normalized, {
916
1106
  lowerCaseAttributeNames: true,
917
1107
  lowerCaseTags: true,
@@ -919,12 +1109,21 @@ function htmlToWordBody(html) {
919
1109
  });
920
1110
  const result = { bodyXml: "", images: [] };
921
1111
  const out = [];
922
- collectBodyBlocks(doc, out, result);
1112
+ const ctx = {
1113
+ defaultBaseFontHalfPoints,
1114
+ defaultBodyCss,
1115
+ defaultPCss,
1116
+ tableWrapperCss,
1117
+ maxTableWidthTwips: layout?.maxTableWidthTwips ?? 9360,
1118
+ maxImageWidthPx: layout?.maxImageWidthPx ?? 624,
1119
+ maxImageHeightPx: layout?.maxImageHeightPx ?? 864
1120
+ };
1121
+ collectBodyBlocks(doc, out, result, ctx);
923
1122
  result.bodyXml = out.join("");
924
1123
  return result;
925
1124
  }
926
1125
  function htmlToWordBodyXml(html) {
927
- const { bodyXml } = htmlToWordBody(html);
1126
+ const { bodyXml } = htmlToWordBody(html, void 0);
928
1127
  if (!bodyXml) {
929
1128
  const text = getTextContent(
930
1129
  parseDocument(html, {
@@ -937,8 +1136,8 @@ function htmlToWordBodyXml(html) {
937
1136
  }
938
1137
  return bodyXml;
939
1138
  }
940
- function htmlToWordBodyWithAssets(html) {
941
- const result = htmlToWordBody(html);
1139
+ function htmlToWordBodyWithAssets(html, layout) {
1140
+ const result = htmlToWordBody(html, layout);
942
1141
  if (!result.bodyXml) {
943
1142
  const text = getTextContent(
944
1143
  parseDocument(html, {
@@ -956,4 +1155,4 @@ export {
956
1155
  htmlToWordBodyXml,
957
1156
  textToWordBodyXml
958
1157
  };
959
- //# sourceMappingURL=htmlToWordBodyXml-SIVUZ7K7.js.map
1158
+ //# sourceMappingURL=htmlToWordBodyXml-VH5U3L5J.js.map