docxmlater 10.3.6 → 10.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +158 -7
  2. package/dist/core/Document.d.ts +98 -3
  3. package/dist/core/Document.d.ts.map +1 -1
  4. package/dist/core/Document.js +740 -50
  5. package/dist/core/Document.js.map +1 -1
  6. package/dist/core/DocumentContent.d.ts.map +1 -1
  7. package/dist/core/DocumentContent.js +0 -8
  8. package/dist/core/DocumentContent.js.map +1 -1
  9. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  10. package/dist/core/DocumentGenerator.js +9 -5
  11. package/dist/core/DocumentGenerator.js.map +1 -1
  12. package/dist/core/DocumentParser.d.ts.map +1 -1
  13. package/dist/core/DocumentParser.js +617 -104
  14. package/dist/core/DocumentParser.js.map +1 -1
  15. package/dist/core/RelationshipManager.d.ts.map +1 -1
  16. package/dist/core/RelationshipManager.js +4 -3
  17. package/dist/core/RelationshipManager.js.map +1 -1
  18. package/dist/elements/Bookmark.d.ts +7 -0
  19. package/dist/elements/Bookmark.d.ts.map +1 -1
  20. package/dist/elements/Bookmark.js +24 -4
  21. package/dist/elements/Bookmark.js.map +1 -1
  22. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  23. package/dist/elements/BookmarkManager.js +4 -3
  24. package/dist/elements/BookmarkManager.js.map +1 -1
  25. package/dist/elements/CommonTypes.d.ts +2 -2
  26. package/dist/elements/CommonTypes.d.ts.map +1 -1
  27. package/dist/elements/CommonTypes.js +14 -1
  28. package/dist/elements/CommonTypes.js.map +1 -1
  29. package/dist/elements/Field.d.ts +1 -1
  30. package/dist/elements/Field.d.ts.map +1 -1
  31. package/dist/elements/Field.js +1 -1
  32. package/dist/elements/Field.js.map +1 -1
  33. package/dist/elements/Footer.d.ts +2 -0
  34. package/dist/elements/Footer.d.ts.map +1 -1
  35. package/dist/elements/Footer.js +6 -0
  36. package/dist/elements/Footer.js.map +1 -1
  37. package/dist/elements/Header.d.ts +2 -0
  38. package/dist/elements/Header.d.ts.map +1 -1
  39. package/dist/elements/Header.js +6 -0
  40. package/dist/elements/Header.js.map +1 -1
  41. package/dist/elements/Image.d.ts.map +1 -1
  42. package/dist/elements/Image.js +3 -0
  43. package/dist/elements/Image.js.map +1 -1
  44. package/dist/elements/Paragraph.d.ts +81 -1
  45. package/dist/elements/Paragraph.d.ts.map +1 -1
  46. package/dist/elements/Paragraph.js +515 -21
  47. package/dist/elements/Paragraph.js.map +1 -1
  48. package/dist/elements/Revision.d.ts +0 -1
  49. package/dist/elements/Revision.d.ts.map +1 -1
  50. package/dist/elements/Revision.js +0 -12
  51. package/dist/elements/Revision.js.map +1 -1
  52. package/dist/elements/RevisionManager.d.ts +0 -1
  53. package/dist/elements/RevisionManager.d.ts.map +1 -1
  54. package/dist/elements/RevisionManager.js +0 -2
  55. package/dist/elements/RevisionManager.js.map +1 -1
  56. package/dist/elements/Run.d.ts +16 -4
  57. package/dist/elements/Run.d.ts.map +1 -1
  58. package/dist/elements/Run.js +114 -22
  59. package/dist/elements/Run.js.map +1 -1
  60. package/dist/elements/Section.d.ts +7 -1
  61. package/dist/elements/Section.d.ts.map +1 -1
  62. package/dist/elements/Section.js +185 -4
  63. package/dist/elements/Section.js.map +1 -1
  64. package/dist/elements/Shape.js.map +1 -1
  65. package/dist/elements/Table.d.ts +30 -1
  66. package/dist/elements/Table.d.ts.map +1 -1
  67. package/dist/elements/Table.js +357 -40
  68. package/dist/elements/Table.js.map +1 -1
  69. package/dist/elements/TableCell.d.ts +3 -0
  70. package/dist/elements/TableCell.d.ts.map +1 -1
  71. package/dist/elements/TableCell.js +30 -3
  72. package/dist/elements/TableCell.js.map +1 -1
  73. package/dist/elements/TableGridChange.d.ts +0 -1
  74. package/dist/elements/TableGridChange.d.ts.map +1 -1
  75. package/dist/elements/TableGridChange.js +0 -10
  76. package/dist/elements/TableGridChange.js.map +1 -1
  77. package/dist/elements/TableRow.d.ts +4 -0
  78. package/dist/elements/TableRow.d.ts.map +1 -1
  79. package/dist/elements/TableRow.js +31 -3
  80. package/dist/elements/TableRow.js.map +1 -1
  81. package/dist/formatting/AbstractNumbering.d.ts +5 -0
  82. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  83. package/dist/formatting/AbstractNumbering.js +22 -0
  84. package/dist/formatting/AbstractNumbering.js.map +1 -1
  85. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  86. package/dist/formatting/NumberingLevel.js +3 -3
  87. package/dist/formatting/NumberingLevel.js.map +1 -1
  88. package/dist/formatting/Style.d.ts +1 -0
  89. package/dist/formatting/Style.d.ts.map +1 -1
  90. package/dist/formatting/Style.js +25 -59
  91. package/dist/formatting/Style.js.map +1 -1
  92. package/dist/formatting/StylesManager.d.ts +1 -0
  93. package/dist/formatting/StylesManager.d.ts.map +1 -1
  94. package/dist/formatting/StylesManager.js +12 -0
  95. package/dist/formatting/StylesManager.js.map +1 -1
  96. package/dist/helpers/CleanupHelper.js.map +1 -1
  97. package/dist/images/ImageOptimizer.d.ts.map +1 -1
  98. package/dist/images/ImageOptimizer.js +0 -1
  99. package/dist/images/ImageOptimizer.js.map +1 -1
  100. package/dist/index.d.ts +1 -1
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js.map +1 -1
  103. package/dist/managers/DrawingManager.d.ts.map +1 -1
  104. package/dist/managers/DrawingManager.js +4 -2
  105. package/dist/managers/DrawingManager.js.map +1 -1
  106. package/dist/types/formatting.d.ts +2 -2
  107. package/dist/types/formatting.d.ts.map +1 -1
  108. package/dist/types/formatting.js.map +1 -1
  109. package/dist/utils/ChangelogGenerator.d.ts +2 -2
  110. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  111. package/dist/utils/ChangelogGenerator.js +4 -5
  112. package/dist/utils/ChangelogGenerator.js.map +1 -1
  113. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  114. package/dist/utils/InMemoryRevisionAcceptor.js +0 -1
  115. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  116. package/dist/utils/RevisionAwareProcessor.d.ts +2 -2
  117. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  118. package/dist/utils/RevisionAwareProcessor.js +2 -2
  119. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  120. package/dist/utils/SelectiveRevisionAcceptor.d.ts +0 -2
  121. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  122. package/dist/utils/SelectiveRevisionAcceptor.js +0 -26
  123. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  124. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  125. package/dist/utils/ShadingResolver.js.map +1 -1
  126. package/dist/utils/acceptRevisions.js +1 -1
  127. package/dist/utils/acceptRevisions.js.map +1 -1
  128. package/dist/utils/stripTrackedChanges.js +1 -1
  129. package/dist/utils/stripTrackedChanges.js.map +1 -1
  130. package/dist/utils/units.d.ts.map +1 -1
  131. package/dist/utils/units.js +1 -1
  132. package/dist/utils/units.js.map +1 -1
  133. package/dist/validation/RevisionAutoFixer.d.ts +2 -1
  134. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  135. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  136. package/package.json +10 -1
  137. package/src/constants/CLAUDE.md +28 -0
  138. package/src/core/CLAUDE.md +4 -0
  139. package/src/core/Document.ts +1770 -83
  140. package/src/core/DocumentContent.ts +0 -11
  141. package/src/core/DocumentGenerator.ts +11 -12
  142. package/src/core/DocumentParser.ts +654 -141
  143. package/src/core/RelationshipManager.ts +6 -3
  144. package/src/elements/Bookmark.ts +39 -4
  145. package/src/elements/BookmarkManager.ts +4 -3
  146. package/src/elements/CLAUDE.md +18 -2
  147. package/src/elements/CommonTypes.ts +35 -8
  148. package/src/elements/Field.ts +1 -1
  149. package/src/elements/Footer.ts +23 -0
  150. package/src/elements/Header.ts +25 -0
  151. package/src/elements/Image.ts +5 -0
  152. package/src/elements/Paragraph.ts +1069 -41
  153. package/src/elements/Revision.ts +0 -19
  154. package/src/elements/RevisionManager.ts +1 -3
  155. package/src/elements/Run.ts +265 -35
  156. package/src/elements/Section.ts +214 -8
  157. package/src/elements/Shape.ts +1 -1
  158. package/src/elements/Table.ts +850 -61
  159. package/src/elements/TableCell.ts +84 -10
  160. package/src/elements/TableGridChange.ts +2 -16
  161. package/src/elements/TableRow.ts +94 -9
  162. package/src/formatting/AbstractNumbering.ts +42 -1
  163. package/src/formatting/CLAUDE.md +4 -0
  164. package/src/formatting/NumberingLevel.ts +11 -7
  165. package/src/formatting/Style.ts +39 -71
  166. package/src/formatting/StylesManager.ts +36 -0
  167. package/src/helpers/CleanupHelper.ts +1 -1
  168. package/src/images/ImageOptimizer.ts +0 -3
  169. package/src/index.ts +1 -1
  170. package/src/managers/DrawingManager.ts +5 -3
  171. package/src/tracking/CLAUDE.md +30 -0
  172. package/src/types/CLAUDE.md +39 -0
  173. package/src/types/formatting.ts +2 -2
  174. package/src/utils/CLAUDE.md +15 -0
  175. package/src/utils/ChangelogGenerator.ts +4 -5
  176. package/src/utils/InMemoryRevisionAcceptor.ts +0 -9
  177. package/src/utils/RevisionAwareProcessor.ts +2 -3
  178. package/src/utils/SelectiveRevisionAcceptor.ts +0 -39
  179. package/src/utils/ShadingResolver.ts +0 -1
  180. package/src/utils/acceptRevisions.ts +1 -1
  181. package/src/utils/stripTrackedChanges.ts +1 -1
  182. package/src/utils/units.ts +2 -1
  183. package/src/validation/CLAUDE.md +40 -0
  184. package/src/validation/RevisionAutoFixer.ts +2 -1
@@ -3,13 +3,10 @@
3
3
  * Provides a simple interface for creating DOCX files without managing ZIP and XML manually
4
4
  */
5
5
 
6
- import { AlternateContent } from '../elements/AlternateContent';
7
6
  import { Bookmark } from '../elements/Bookmark';
8
7
  import { BookmarkManager } from '../elements/BookmarkManager';
9
8
  import { Comment } from '../elements/Comment';
10
- import { CustomXmlBlock } from '../elements/CustomXml';
11
9
  import { PreservedElement } from '../elements/PreservedElement';
12
- import { MathParagraph } from '../elements/MathElement';
13
10
  import { CommentManager } from '../elements/CommentManager';
14
11
  import { Endnote } from '../elements/Endnote';
15
12
  import { EndnoteManager } from '../elements/EndnoteManager';
@@ -77,7 +74,7 @@ function getLogger(): ILogger {
77
74
  // cleanupRevisionMetadata - cleanup metadata files after in-memory acceptance
78
75
  import { acceptAllRevisions, cleanupRevisionMetadata } from '../utils/acceptRevisions';
79
76
  // In-memory revision acceptance - used AFTER parsing, allows subsequent modifications
80
- import { acceptRevisionsInMemory, AcceptRevisionsResult } from '../utils/InMemoryRevisionAcceptor';
77
+ import { acceptRevisionsInMemory } from '../utils/InMemoryRevisionAcceptor';
81
78
  import { stripTrackedChanges } from '../utils/stripTrackedChanges';
82
79
  import { diffText, diffHasUnchangedParts } from '../utils/textDiff';
83
80
  import { XMLBuilder } from '../xml/XMLBuilder';
@@ -333,9 +330,6 @@ export class Document {
333
330
  // TOC auto-population setting
334
331
  private autoPopulateTOCs = false;
335
332
 
336
- // TOC field instruction sync setting (default: OFF to preserve original instructions)
337
- private autoSyncTOCStyles = false;
338
-
339
333
  // Flag to skip document.xml regeneration after stripping tracked changes
340
334
  // When true, save() and toBuffer() will preserve the manually cleaned XML
341
335
  private skipDocumentXmlRegeneration = false;
@@ -558,6 +552,211 @@ export class Document {
558
552
  return doc;
559
553
  }
560
554
 
555
+ /**
556
+ * Creates a Document from Markdown text
557
+ *
558
+ * Parses common Markdown syntax and builds a DOCX document. Supports:
559
+ * - Headings (`#` through `######`)
560
+ * - Bold (`**text**`), italic (`*text*`), bold+italic (`***text***`)
561
+ * - Strikethrough (`~~text~~`)
562
+ * - Inline code (`` `code` ``) rendered in Courier New
563
+ * - Links (`[text](url)`)
564
+ * - Bullet lists (`- ` or `* `)
565
+ * - Numbered lists (`1. `)
566
+ * - Tables (`| col | col |` with `| --- |` separator)
567
+ * - Horizontal rules (`---`, `***`, `___`)
568
+ * - Blank lines as paragraph separators
569
+ *
570
+ * @param markdown - Markdown text to convert
571
+ * @param options - Optional document options
572
+ * @returns New Document populated with the parsed content
573
+ *
574
+ * @example
575
+ * ```typescript
576
+ * const doc = Document.fromMarkdown(`
577
+ * # Report Title
578
+ *
579
+ * This is the **introduction** with *emphasis*.
580
+ *
581
+ * ## Data
582
+ *
583
+ * | Name | Value |
584
+ * | --- | --- |
585
+ * | Alpha | 100 |
586
+ *
587
+ * - First item
588
+ * - Second item
589
+ * `);
590
+ * await doc.save('output.docx');
591
+ * ```
592
+ */
593
+ static fromMarkdown(markdown: string, options?: DocumentOptions): Document {
594
+ const doc = Document.create(options);
595
+ const lines = markdown.split('\n');
596
+
597
+ let i = 0;
598
+ while (i < lines.length) {
599
+ const line = lines[i]!;
600
+
601
+ // Skip blank lines
602
+ if (line.trim() === '') {
603
+ i++;
604
+ continue;
605
+ }
606
+
607
+ // Horizontal rule: ---, ***, ___ (3+ of same char, optional spaces)
608
+ if (/^\s{0,3}([-]{3,}|[*]{3,}|[_]{3,})\s*$/.test(line)) {
609
+ doc.addHorizontalRule();
610
+ i++;
611
+ continue;
612
+ }
613
+
614
+ // Heading
615
+ const headingMatch = /^(#{1,6})\s+(.+)$/.exec(line);
616
+ if (headingMatch) {
617
+ const level = headingMatch[1]!.length as 1 | 2 | 3 | 4 | 5 | 6;
618
+ const text = headingMatch[2]!;
619
+ const para = doc.addHeading('', level);
620
+ Document.applyInlineMarkdown(para, text);
621
+ i++;
622
+ continue;
623
+ }
624
+
625
+ // Table (starts with |)
626
+ if (line.trimStart().startsWith('|')) {
627
+ const tableLines: string[] = [];
628
+ while (i < lines.length && lines[i]!.trimStart().startsWith('|')) {
629
+ tableLines.push(lines[i]!);
630
+ i++;
631
+ }
632
+ const table = Document.parseMarkdownTable(tableLines);
633
+ if (table) {
634
+ doc.addTable(table);
635
+ }
636
+ continue;
637
+ }
638
+
639
+ // Bullet list item
640
+ const bulletMatch = /^(\s*)[-*+]\s+(.+)$/.exec(line);
641
+ if (bulletMatch) {
642
+ const text = bulletMatch[2]!;
643
+ const para = doc.createParagraph();
644
+ Document.applyInlineMarkdown(para, text);
645
+ para.setStyle('ListBullet');
646
+ i++;
647
+ continue;
648
+ }
649
+
650
+ // Numbered list item
651
+ const numberMatch = /^(\s*)\d+[.)]\s+(.+)$/.exec(line);
652
+ if (numberMatch) {
653
+ const text = numberMatch[2]!;
654
+ const para = doc.createParagraph();
655
+ Document.applyInlineMarkdown(para, text);
656
+ para.setStyle('ListNumber');
657
+ i++;
658
+ continue;
659
+ }
660
+
661
+ // Regular paragraph (may span multiple non-blank lines)
662
+ const paraLines: string[] = [line];
663
+ i++;
664
+ while (
665
+ i < lines.length &&
666
+ lines[i]!.trim() !== '' &&
667
+ !lines[i]!.trim().startsWith('#') &&
668
+ !lines[i]!.trim().startsWith('|') &&
669
+ !/^\s{0,3}([-]{3,}|[*]{3,}|[_]{3,})\s*$/.test(lines[i]!) &&
670
+ !/^(\s*)[-*+]\s+/.test(lines[i]!) &&
671
+ !/^(\s*)\d+[.)]\s+/.test(lines[i]!)
672
+ ) {
673
+ paraLines.push(lines[i]!);
674
+ i++;
675
+ }
676
+
677
+ const para = doc.createParagraph();
678
+ Document.applyInlineMarkdown(para, paraLines.join(' '));
679
+ }
680
+
681
+ return doc;
682
+ }
683
+
684
+ /**
685
+ * Parses inline Markdown formatting and adds runs to a paragraph.
686
+ * Handles bold, italic, strikethrough, inline code, and links.
687
+ * @internal
688
+ */
689
+ private static applyInlineMarkdown(para: Paragraph, text: string): void {
690
+ // Regex to match inline elements in priority order
691
+ const inlinePattern =
692
+ /(\*\*\*(.+?)\*\*\*|\*\*(.+?)\*\*|\*(.+?)\*|~~(.+?)~~|`([^`]+)`|\[([^\]]+)\]\(([^)]+)\))/g;
693
+
694
+ let lastIndex = 0;
695
+ let match: RegExpExecArray | null;
696
+
697
+ while ((match = inlinePattern.exec(text)) !== null) {
698
+ // Add plain text before this match
699
+ if (match.index > lastIndex) {
700
+ para.addText(text.slice(lastIndex, match.index));
701
+ }
702
+
703
+ if (match[2] !== undefined) {
704
+ // ***bold+italic***
705
+ para.addText(match[2], { bold: true, italic: true });
706
+ } else if (match[3] !== undefined) {
707
+ // **bold**
708
+ para.addText(match[3], { bold: true });
709
+ } else if (match[4] !== undefined) {
710
+ // *italic*
711
+ para.addText(match[4], { italic: true });
712
+ } else if (match[5] !== undefined) {
713
+ // ~~strikethrough~~
714
+ para.addText(match[5], { strike: true });
715
+ } else if (match[6] !== undefined) {
716
+ // `inline code`
717
+ para.addText(match[6], { font: 'Courier New' });
718
+ } else if (match[7] !== undefined && match[8] !== undefined) {
719
+ // [text](url)
720
+ para.addHyperlink(new Hyperlink({ url: match[8], text: match[7] }));
721
+ }
722
+
723
+ lastIndex = match.index + match[0]!.length;
724
+ }
725
+
726
+ // Add remaining plain text
727
+ if (lastIndex < text.length) {
728
+ para.addText(text.slice(lastIndex));
729
+ }
730
+ }
731
+
732
+ /**
733
+ * Parses Markdown table lines into a Table.
734
+ * @internal
735
+ */
736
+ private static parseMarkdownTable(lines: string[]): Table | null {
737
+ if (lines.length < 2) return null;
738
+
739
+ const parseRow = (line: string): string[] =>
740
+ line
741
+ .replace(/^\|/, '')
742
+ .replace(/\|$/, '')
743
+ .split('|')
744
+ .map((cell) => cell.trim());
745
+
746
+ const rows: string[][] = [];
747
+ for (let i = 0; i < lines.length; i++) {
748
+ const cells = parseRow(lines[i]!);
749
+
750
+ // Skip separator row (| --- | --- |)
751
+ if (cells.every((c) => /^:?-+:?$/.test(c))) continue;
752
+
753
+ rows.push(cells);
754
+ }
755
+
756
+ if (rows.length === 0) return null;
757
+ return Table.fromArray(rows);
758
+ }
759
+
561
760
  /**
562
761
  * Loads an existing Word document from a file path
563
762
  *
@@ -1461,6 +1660,83 @@ export class Document {
1461
1660
  return para;
1462
1661
  }
1463
1662
 
1663
+ /**
1664
+ * Creates a heading paragraph and appends it to the document
1665
+ *
1666
+ * Convenience method that creates a paragraph with the given text and
1667
+ * applies a heading style (Heading1–Heading9). Equivalent to:
1668
+ * ```typescript
1669
+ * doc.createParagraph(text).setStyle(`Heading${level}`);
1670
+ * ```
1671
+ *
1672
+ * @param text - Heading text content
1673
+ * @param level - Heading level 1–9 (default: 1)
1674
+ * @returns The created Paragraph for further customization
1675
+ *
1676
+ * @example
1677
+ * ```typescript
1678
+ * doc.addHeading('Introduction', 1);
1679
+ * doc.addHeading('Background', 2);
1680
+ * doc.addHeading('Methods', 2);
1681
+ * doc.addHeading('Data Collection', 3);
1682
+ * ```
1683
+ */
1684
+ addHeading(text: string, level: 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 = 1): Paragraph {
1685
+ return this.createParagraph(text).setStyle(`Heading${level}`);
1686
+ }
1687
+
1688
+ /**
1689
+ * Inserts a page break into the document
1690
+ *
1691
+ * Creates a paragraph containing a page break element and appends it
1692
+ * to the document body. This is the standard way to force a new page
1693
+ * in OOXML (a paragraph with `w:br w:type="page"`).
1694
+ *
1695
+ * @returns The created Paragraph (allows further content after the break)
1696
+ *
1697
+ * @example
1698
+ * ```typescript
1699
+ * doc.addHeading('Chapter 1', 1);
1700
+ * doc.createParagraph('Chapter 1 content...');
1701
+ * doc.addPageBreak();
1702
+ * doc.addHeading('Chapter 2', 1);
1703
+ * ```
1704
+ */
1705
+ addPageBreak(): Paragraph {
1706
+ const para = this.createParagraph();
1707
+ const run = new Run('');
1708
+ run.addBreak('page');
1709
+ para.addRun(run);
1710
+ return para;
1711
+ }
1712
+
1713
+ /**
1714
+ * Inserts a horizontal rule into the document
1715
+ *
1716
+ * Creates an empty paragraph with a bottom border that renders as a
1717
+ * horizontal line. Uses a thin single-line border, which is the standard
1718
+ * OOXML approach for horizontal rules (no dedicated HR element exists).
1719
+ *
1720
+ * @param color - Border color in hex without # (default: 'auto')
1721
+ * @param size - Border thickness in eighths of a point (default: 4, ~0.5pt)
1722
+ * @returns The created Paragraph
1723
+ *
1724
+ * @example
1725
+ * ```typescript
1726
+ * doc.createParagraph('Above the line');
1727
+ * doc.addHorizontalRule();
1728
+ * doc.createParagraph('Below the line');
1729
+ *
1730
+ * // Custom color and thickness
1731
+ * doc.addHorizontalRule('FF0000', 12);
1732
+ * ```
1733
+ */
1734
+ addHorizontalRule(color = 'auto', size = 4): Paragraph {
1735
+ const para = this.createParagraph();
1736
+ para.setBorder({ bottom: { style: 'single', size, color, space: 1 } });
1737
+ return para;
1738
+ }
1739
+
1464
1740
  /**
1465
1741
  * Adds an existing table to the document body
1466
1742
  *
@@ -1545,6 +1821,32 @@ export class Document {
1545
1821
  return table;
1546
1822
  }
1547
1823
 
1824
+ /**
1825
+ * Creates a table from CSV data and appends it to the document
1826
+ *
1827
+ * Parses the CSV string into a table using `Table.fromCSV()` and adds
1828
+ * it to the document body. Handles quoted fields, commas in values,
1829
+ * and other RFC 4180 features.
1830
+ *
1831
+ * @param csv - CSV string to parse
1832
+ * @param delimiter - Field delimiter (default: ',')
1833
+ * @returns The created Table
1834
+ *
1835
+ * @example
1836
+ * ```typescript
1837
+ * doc.createTableFromCSV('Name,Age\nAlice,30\nBob,25');
1838
+ *
1839
+ * // From a TSV string
1840
+ * doc.createTableFromCSV(tsvData, '\t');
1841
+ * ```
1842
+ */
1843
+ createTableFromCSV(csv: string, delimiter = ','): Table {
1844
+ const table = Table.fromCSV(csv, delimiter);
1845
+ table._setStylesManager(this.stylesManager);
1846
+ this.bodyElements.push(table);
1847
+ return table;
1848
+ }
1849
+
1548
1850
  /**
1549
1851
  * Populates all TOCs in document XML
1550
1852
  * Extracted from replaceTableOfContents for reuse
@@ -2194,6 +2496,19 @@ export class Document {
2194
2496
  this.updateContentTypesWithImagesHeadersFootersAndComments();
2195
2497
  }
2196
2498
 
2499
+ /**
2500
+ * Saves the document to a file. Uses atomic write (temp file + rename) for crash safety.
2501
+ * Always call dispose() after saving when done with the document.
2502
+ *
2503
+ * @param filePath - Output file path
2504
+ * @throws {FileOperationError} If the file cannot be written
2505
+ *
2506
+ * @example
2507
+ * ```typescript
2508
+ * await doc.save('output.docx');
2509
+ * doc.dispose();
2510
+ * ```
2511
+ */
2197
2512
  async save(filePath: string): Promise<void> {
2198
2513
  const logger = getLogger();
2199
2514
  logger.info('Saving document', { path: filePath, paragraphs: this.getParagraphCount() });
@@ -2236,7 +2551,7 @@ export class Document {
2236
2551
  const { promises: fs } = await import('fs');
2237
2552
  await fs.unlink(tempPath);
2238
2553
  } catch (cleanupErr) {
2239
- logger.debug('Failed to clean up temp file', { tempPath, error: String(cleanupErr) });
2554
+ logger.warn('Failed to clean up temp file', { tempPath, error: String(cleanupErr) });
2240
2555
  }
2241
2556
  throw error; // Re-throw original error
2242
2557
  } finally {
@@ -2320,6 +2635,123 @@ export class Document {
2320
2635
  }
2321
2636
  }
2322
2637
 
2638
+ /**
2639
+ * Generates the document as a base64-encoded string
2640
+ *
2641
+ * Produces the same DOCX content as `toBuffer()` but encoded as base64.
2642
+ * Useful for embedding in JSON API responses, storing in databases as text,
2643
+ * passing through systems that don't support binary data, or constructing
2644
+ * data URIs (see `toDataUri()`).
2645
+ *
2646
+ * @returns Promise resolving to a base64-encoded string of the DOCX file
2647
+ *
2648
+ * @example
2649
+ * ```typescript
2650
+ * // JSON API response
2651
+ * const base64 = await doc.toBase64();
2652
+ * res.json({ filename: 'report.docx', content: base64 });
2653
+ *
2654
+ * // Store in text-based database field
2655
+ * await db.insert({ docBase64: await doc.toBase64() });
2656
+ * ```
2657
+ */
2658
+ async toBase64(): Promise<string> {
2659
+ const buffer = await this.toBuffer();
2660
+ return buffer.toString('base64');
2661
+ }
2662
+
2663
+ /**
2664
+ * Generates the document as a data URI string
2665
+ *
2666
+ * Returns a complete `data:` URI with the DOCX MIME type and base64-encoded
2667
+ * content. Can be used directly as an `href` for download links, embedded
2668
+ * in HTML, or passed to APIs expecting data URIs.
2669
+ *
2670
+ * @returns Promise resolving to a data URI string
2671
+ *
2672
+ * @example
2673
+ * ```typescript
2674
+ * // HTML download link
2675
+ * const uri = await doc.toDataUri();
2676
+ * const html = `<a href="${uri}" download="report.docx">Download</a>`;
2677
+ *
2678
+ * // Embed in email HTML
2679
+ * const dataUri = await doc.toDataUri();
2680
+ * ```
2681
+ */
2682
+ async toDataUri(): Promise<string> {
2683
+ const base64 = await this.toBase64();
2684
+ return `data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,${base64}`;
2685
+ }
2686
+
2687
+ /**
2688
+ * Loads a document from a base64-encoded string
2689
+ *
2690
+ * The inverse of `toBase64()`. Creates a Document from a base64 string,
2691
+ * useful for receiving documents from JSON APIs or text-based storage.
2692
+ *
2693
+ * @param base64 - Base64-encoded DOCX content
2694
+ * @param options - Optional document configuration
2695
+ * @returns Promise resolving to a Document instance
2696
+ *
2697
+ * @example
2698
+ * ```typescript
2699
+ * // Receive from API
2700
+ * const doc = await Document.loadFromBase64(apiResponse.content);
2701
+ * console.log(doc.toPlainText());
2702
+ * ```
2703
+ */
2704
+ static async loadFromBase64(base64: string, options?: DocumentOptions): Promise<Document> {
2705
+ const buffer = Buffer.from(base64, 'base64');
2706
+ return Document.loadFromBuffer(buffer, options);
2707
+ }
2708
+
2709
+ /**
2710
+ * Creates an independent deep copy of this document
2711
+ *
2712
+ * Serializes the document to a buffer and reloads it, producing a
2713
+ * completely independent clone with its own body elements, styles,
2714
+ * numbering, images, and ZIP state. Changes to the clone do not
2715
+ * affect the original and vice versa.
2716
+ *
2717
+ * Essential for template-based batch generation: load a template
2718
+ * once, clone it N times, and fill each with different data.
2719
+ *
2720
+ * @returns Promise resolving to a new Document with identical content
2721
+ *
2722
+ * @example
2723
+ * ```typescript
2724
+ * // Template-based batch generation
2725
+ * const template = await Document.load('template.docx');
2726
+ *
2727
+ * for (const record of data) {
2728
+ * const doc = await template.clone();
2729
+ * doc.fillTemplate(record);
2730
+ * await doc.save(`output-${record.id}.docx`);
2731
+ * doc.dispose();
2732
+ * }
2733
+ *
2734
+ * template.dispose();
2735
+ * ```
2736
+ *
2737
+ * @example
2738
+ * ```typescript
2739
+ * // Fork a document for parallel modifications
2740
+ * const original = Document.create();
2741
+ * original.addHeading('Shared Title', 1);
2742
+ *
2743
+ * const version1 = await original.clone();
2744
+ * version1.createParagraph('Version 1 content');
2745
+ *
2746
+ * const version2 = await original.clone();
2747
+ * version2.createParagraph('Version 2 content');
2748
+ * ```
2749
+ */
2750
+ async clone(): Promise<Document> {
2751
+ const buffer = await this.toBuffer();
2752
+ return Document.loadFromBuffer(buffer);
2753
+ }
2754
+
2323
2755
  /**
2324
2756
  * Updates the document.xml file with current paragraphs
2325
2757
  */
@@ -2940,10 +3372,12 @@ export class Document {
2940
3372
  return divCount;
2941
3373
  }
2942
3374
 
3375
+ /** Gets the optimizeForBrowser web setting. */
2943
3376
  getOptimizeForBrowser(): boolean {
2944
3377
  return this._webSettings.optimizeForBrowser;
2945
3378
  }
2946
3379
 
3380
+ /** Sets the optimizeForBrowser web setting. */
2947
3381
  setOptimizeForBrowser(value: boolean): this {
2948
3382
  this._webSettings.optimizeForBrowser = value;
2949
3383
  this._webSettingsModified = true;
@@ -2952,10 +3386,12 @@ export class Document {
2952
3386
  return this;
2953
3387
  }
2954
3388
 
3389
+ /** Gets the allowPNG web setting. */
2955
3390
  getAllowPNG(): boolean {
2956
3391
  return this._webSettings.allowPNG;
2957
3392
  }
2958
3393
 
3394
+ /** Sets the allowPNG web setting. */
2959
3395
  setAllowPNG(value: boolean): this {
2960
3396
  this._webSettings.allowPNG = value;
2961
3397
  this._webSettingsModified = true;
@@ -4475,7 +4911,7 @@ export class Document {
4475
4911
  left: options?.cellMargins?.left ?? 115, // 0.08 inches
4476
4912
  right: options?.cellMargins?.right ?? 115, // 0.08 inches
4477
4913
  };
4478
- const skipSingleCellTables = options?.skipSingleCellTables !== false && !singleCellShading;
4914
+ // Note: skipSingleCellTables option is accepted but not yet implemented
4479
4915
 
4480
4916
  // Statistics
4481
4917
  let tablesProcessed = 0;
@@ -5069,7 +5505,7 @@ export class Document {
5069
5505
  validateNumberingReferences(): number {
5070
5506
  let fixed = 0;
5071
5507
  const existingNumIds = new Set<number>(
5072
- this.numberingManager.getAllInstances().map((i: any) => i.getNumId())
5508
+ this.numberingManager.getAllInstances().map((i) => i.getNumId())
5073
5509
  );
5074
5510
 
5075
5511
  for (const para of this.getAllParagraphs()) {
@@ -5261,6 +5697,24 @@ export class Document {
5261
5697
 
5262
5698
  let docPrId = 1;
5263
5699
 
5700
+ // Collect existing paraIds to avoid collisions when generating new ones
5701
+ const existingParaIds = new Set<string>();
5702
+ const paragraphsNeedingIds: Paragraph[] = [];
5703
+
5704
+ const generateUniqueParaId = (): string => {
5705
+ let id: string;
5706
+ do {
5707
+ // Generate 8-char uppercase hex string matching Word's w14:paraId format
5708
+ // Per ECMA-376, ST_LongHexNumber MaxExclusive is 80000000 (must be < 0x80000000)
5709
+ id = Math.floor(Math.random() * 0x7fffffff + 1)
5710
+ .toString(16)
5711
+ .toUpperCase()
5712
+ .padStart(8, '0');
5713
+ } while (existingParaIds.has(id));
5714
+ existingParaIds.add(id);
5715
+ return id;
5716
+ };
5717
+
5264
5718
  const processParagraph = (para: Paragraph) => {
5265
5719
  // Assign unique IDs to unregistered revisions
5266
5720
  for (const rev of para.getRevisions()) {
@@ -5281,6 +5735,13 @@ export class Document {
5281
5735
  item.getImageElement().setDocPrId(docPrId++);
5282
5736
  }
5283
5737
  }
5738
+
5739
+ // Track existing paraIds and paragraphs that need new ones
5740
+ if (para.formatting.paraId) {
5741
+ existingParaIds.add(para.formatting.paraId);
5742
+ } else {
5743
+ paragraphsNeedingIds.push(para);
5744
+ }
5284
5745
  };
5285
5746
 
5286
5747
  for (const element of this.bodyElements) {
@@ -5296,6 +5757,14 @@ export class Document {
5296
5757
  }
5297
5758
  }
5298
5759
  }
5760
+
5761
+ // Generate w14:paraId and w14:textId for paragraphs that lack them (Word 2010+ requirement)
5762
+ for (const para of paragraphsNeedingIds) {
5763
+ para.formatting.paraId = generateUniqueParaId();
5764
+ if (!para.formatting.textId) {
5765
+ para.formatting.textId = generateUniqueParaId();
5766
+ }
5767
+ }
5299
5768
  }
5300
5769
 
5301
5770
  /**
@@ -5741,6 +6210,103 @@ export class Document {
5741
6210
  return this.numberingManager.createMultiLevelList();
5742
6211
  }
5743
6212
 
6213
+ /**
6214
+ * Creates a bullet list from an array of text items and appends it to the document
6215
+ *
6216
+ * Handles all numbering plumbing internally: creates a bullet list definition,
6217
+ * creates paragraphs, and applies numbering to each one. Supports nested items
6218
+ * via `{ text, level }` objects.
6219
+ *
6220
+ * @param items - Array of strings or `{ text, level }` objects. Strings default to level 0.
6221
+ * @param formatting - Optional run formatting applied to all items
6222
+ * @returns Array of created Paragraphs
6223
+ *
6224
+ * @example
6225
+ * ```typescript
6226
+ * // Simple flat list
6227
+ * doc.addBulletListFromArray(['First item', 'Second item', 'Third item']);
6228
+ *
6229
+ * // Nested list
6230
+ * doc.addBulletListFromArray([
6231
+ * 'Top level',
6232
+ * { text: 'Nested item', level: 1 },
6233
+ * { text: 'Deeper item', level: 2 },
6234
+ * 'Back to top',
6235
+ * ]);
6236
+ *
6237
+ * // With formatting
6238
+ * doc.addBulletListFromArray(['Bold item'], { bold: true });
6239
+ * ```
6240
+ */
6241
+ addBulletListFromArray(
6242
+ items: (string | { text: string; level?: number })[],
6243
+ formatting?: RunFormatting
6244
+ ): Paragraph[] {
6245
+ if (items.length === 0) return [];
6246
+
6247
+ const numId = this.createBulletList();
6248
+ return this.addListItems(numId, items, formatting);
6249
+ }
6250
+
6251
+ /**
6252
+ * Creates a numbered list from an array of text items and appends it to the document
6253
+ *
6254
+ * Handles all numbering plumbing internally: creates a numbered list definition,
6255
+ * creates paragraphs, and applies numbering to each one. Supports nested items
6256
+ * via `{ text, level }` objects.
6257
+ *
6258
+ * @param items - Array of strings or `{ text, level }` objects. Strings default to level 0.
6259
+ * @param formatting - Optional run formatting applied to all items
6260
+ * @returns Array of created Paragraphs
6261
+ *
6262
+ * @example
6263
+ * ```typescript
6264
+ * // Simple numbered list
6265
+ * doc.addNumberedListFromArray(['First', 'Second', 'Third']);
6266
+ *
6267
+ * // Nested numbered list
6268
+ * doc.addNumberedListFromArray([
6269
+ * 'Chapter 1',
6270
+ * { text: 'Section 1.1', level: 1 },
6271
+ * { text: 'Section 1.2', level: 1 },
6272
+ * 'Chapter 2',
6273
+ * ]);
6274
+ * ```
6275
+ */
6276
+ addNumberedListFromArray(
6277
+ items: (string | { text: string; level?: number })[],
6278
+ formatting?: RunFormatting
6279
+ ): Paragraph[] {
6280
+ if (items.length === 0) return [];
6281
+
6282
+ const numId = this.createNumberedList();
6283
+ return this.addListItems(numId, items, formatting);
6284
+ }
6285
+
6286
+ /**
6287
+ * Internal helper that creates list paragraphs from items.
6288
+ * @internal
6289
+ */
6290
+ private addListItems(
6291
+ numId: number,
6292
+ items: (string | { text: string; level?: number })[],
6293
+ formatting?: RunFormatting
6294
+ ): Paragraph[] {
6295
+ const paragraphs: Paragraph[] = [];
6296
+
6297
+ for (const item of items) {
6298
+ const text = typeof item === 'string' ? item : item.text;
6299
+ const level = typeof item === 'string' ? 0 : (item.level ?? 0);
6300
+
6301
+ const para = this.createParagraph();
6302
+ para.addText(text, formatting);
6303
+ para.setNumbering(numId, level);
6304
+ paragraphs.push(para);
6305
+ }
6306
+
6307
+ return paragraphs;
6308
+ }
6309
+
5744
6310
  /**
5745
6311
  * Creates a new numbering instance that restarts numbering for an existing list
5746
6312
  *
@@ -6269,6 +6835,15 @@ export class Document {
6269
6835
  * });
6270
6836
  * ```
6271
6837
  */
6838
+ private static stripThemeFontsIfExplicitFont(runConfig: any): void {
6839
+ if (runConfig?.font) {
6840
+ delete runConfig.fontAsciiTheme;
6841
+ delete runConfig.fontHAnsiTheme;
6842
+ delete runConfig.fontEastAsiaTheme;
6843
+ delete runConfig.fontCsTheme;
6844
+ }
6845
+ }
6846
+
6272
6847
  public applyStyles(options?: ApplyStylesOptions): {
6273
6848
  heading1: boolean;
6274
6849
  heading2: boolean;
@@ -6349,6 +6924,14 @@ export class Document {
6349
6924
  },
6350
6925
  };
6351
6926
 
6927
+ // Strip theme font attributes when an explicit font is provided,
6928
+ // because Word prioritizes theme fonts over explicit font names.
6929
+ Document.stripThemeFontsIfExplicitFont(h1Config.run);
6930
+ Document.stripThemeFontsIfExplicitFont(h2Config.run);
6931
+ Document.stripThemeFontsIfExplicitFont(h3Config.run);
6932
+ Document.stripThemeFontsIfExplicitFont(normalConfig.run);
6933
+ Document.stripThemeFontsIfExplicitFont(listParaConfig.run);
6934
+
6352
6935
  // Extract preserve blank lines option (defaults to true)
6353
6936
  const preserveBlankLines = options?.preserveBlankLinesAfterHeading2Tables ?? true;
6354
6937
 
@@ -7334,7 +7917,7 @@ export class Document {
7334
7917
  const fillPattern = new RegExp(`(w:fill=["'])${normalizedOld}(["'])`, 'gi');
7335
7918
 
7336
7919
  // Replace all occurrences
7337
- stylesXml = stylesXml.replace(fillPattern, (match, prefix, suffix) => {
7920
+ stylesXml = stylesXml.replace(fillPattern, (_match, prefix, suffix) => {
7338
7921
  updateCount++;
7339
7922
  return `${prefix}${normalizedNew}${suffix}`;
7340
7923
  });
@@ -7343,7 +7926,7 @@ export class Document {
7343
7926
  // Matches: w:color="A5A5A5" within shd elements
7344
7927
  const colorPattern = new RegExp(`(<w:shd[^>]*w:color=["'])${normalizedOld}(["'])`, 'gi');
7345
7928
 
7346
- stylesXml = stylesXml.replace(colorPattern, (match, prefix, suffix) => {
7929
+ stylesXml = stylesXml.replace(colorPattern, (_match, prefix, suffix) => {
7347
7930
  updateCount++;
7348
7931
  return `${prefix}${normalizedNew}${suffix}`;
7349
7932
  });
@@ -7413,27 +7996,6 @@ export class Document {
7413
7996
  * Helper method to process consecutive blank paragraphs
7414
7997
  * @private
7415
7998
  */
7416
- private processConsecutiveBlanks(
7417
- blanks: Paragraph[],
7418
- keepOne: boolean,
7419
- toRemove: Paragraph[]
7420
- ): void {
7421
- if (blanks.length === 0) return;
7422
-
7423
- if (keepOne && blanks.length > 1) {
7424
- // Keep the first one, remove the rest
7425
- for (let i = 1; i < blanks.length; i++) {
7426
- const blank = blanks[i];
7427
- if (blank) {
7428
- toRemove.push(blank);
7429
- }
7430
- }
7431
- } else if (!keepOne) {
7432
- // Remove all
7433
- toRemove.push(...blanks);
7434
- }
7435
- // If keepOne is true and there's only 1 blank, don't remove it
7436
- }
7437
7999
 
7438
8000
  /**
7439
8001
  * Standardizes all bullet list symbols formatting (font, size, bold, color)
@@ -8031,7 +8593,7 @@ export class Document {
8031
8593
  private parseTOCFieldInstruction(instrText: string): number[] {
8032
8594
  const levels = new Set<number>();
8033
8595
  let hasOutlineSwitch = false;
8034
- let hasTableSwitch = false;
8596
+ // hasTableSwitch tracked via \t switch parsing below
8035
8597
 
8036
8598
  // Normalize whitespace and quotes: trim input and replace &quot; with " for consistent parsing
8037
8599
  const normalizedText = instrText.trim().replace(/&quot;/g, '"');
@@ -8065,7 +8627,7 @@ export class Document {
8065
8627
  const tMatches = [...normalizedText.matchAll(tSwitchRegex)];
8066
8628
 
8067
8629
  for (const match of tMatches) {
8068
- hasTableSwitch = true;
8630
+ // \t switch found — heading levels extracted from table style mappings
8069
8631
  const content = (match[1] || '').trim();
8070
8632
  if (!content) continue;
8071
8633
 
@@ -8450,6 +9012,7 @@ export class Document {
8450
9012
  }
8451
9013
 
8452
9014
  // Helper function to extract heading info from a parsed paragraph object
9015
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
8453
9016
  const extractHeading = (para: any): void => {
8454
9017
  const pPr = para['w:pPr'];
8455
9018
  if (!pPr?.['w:pStyle']) {
@@ -8584,52 +9147,6 @@ export class Document {
8584
9147
  return headings;
8585
9148
  }
8586
9149
 
8587
- /**
8588
- * Legacy method - searches only bodyElements (doesn't search inside tables)
8589
- * Kept for compatibility but not recommended
8590
- * @deprecated Use findHeadingsForTOCFromXML instead
8591
- */
8592
- private findHeadingsForTOC(
8593
- levels: number[]
8594
- ): { level: number; text: string; bookmark: string }[] {
8595
- const headings: { level: number; text: string; bookmark: string }[] = [];
8596
- const levelSet = new Set(levels);
8597
-
8598
- // Iterate through body elements
8599
- for (const element of this.bodyElements) {
8600
- if (element instanceof Paragraph) {
8601
- const para = element;
8602
- const formatting = para.getFormatting();
8603
-
8604
- // Check if paragraph has a heading style (handle both "Heading1" and "Heading 1")
8605
- if (formatting.style) {
8606
- const styleMatch = /Heading\s*(\d+)/i.exec(formatting.style);
8607
- if (styleMatch?.[1]) {
8608
- const headingLevel = parseInt(styleMatch[1], 10);
8609
-
8610
- // Check if this level should be included in TOC
8611
- if (levelSet.has(headingLevel)) {
8612
- const text = para.getText().trim();
8613
-
8614
- if (text) {
8615
- // Create or get bookmark for this heading
8616
- const bookmark = this.bookmarkManager.createHeadingBookmark(text);
8617
-
8618
- headings.push({
8619
- level: headingLevel,
8620
- text: text,
8621
- bookmark: bookmark.getName(),
8622
- });
8623
- }
8624
- }
8625
- }
8626
- }
8627
- }
8628
- }
8629
-
8630
- return headings;
8631
- }
8632
-
8633
9150
  /**
8634
9151
  * Generates TOC XML structure with populated entries
8635
9152
  *
@@ -10109,7 +10626,8 @@ export class Document {
10109
10626
 
10110
10627
  /**
10111
10628
  * Checks whether a paragraph contains a hyperlink with `_top` anchor.
10112
- * Handles both inline Hyperlink elements and ComplexField HYPERLINK _top.
10629
+ * Handles inline Hyperlink elements, ComplexField HYPERLINK _top,
10630
+ * and PreservedElement raw XML passthrough (loaded docs).
10113
10631
  * @internal
10114
10632
  */
10115
10633
  private _paragraphHasTopLink(paragraph: Paragraph): boolean {
@@ -10123,7 +10641,11 @@ export class Document {
10123
10641
  return true;
10124
10642
  }
10125
10643
  }
10126
- }
10644
+ // Loaded docs may have hyperlinks as PreservedElement (raw XML passthrough)
10645
+ if (item instanceof PreservedElement && item.getRawXml().includes('w:anchor="_top"')) {
10646
+ return true;
10647
+ }
10648
+ }
10127
10649
  return false;
10128
10650
  }
10129
10651
 
@@ -10812,6 +11334,7 @@ export class Document {
10812
11334
  * @param element - Element to bind
10813
11335
  * @internal
10814
11336
  */
11337
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
10815
11338
  private bindTrackingToElement(element: any): void {
10816
11339
  // Set tracking context on element if it supports it
10817
11340
  if (element && typeof element._setTrackingContext === 'function') {
@@ -11051,6 +11574,7 @@ export class Document {
11051
11574
  this._settingsModified = true;
11052
11575
  }
11053
11576
 
11577
+ /** Gets whether even/odd page headers and footers are enabled (w:evenAndOddHeaders). */
11054
11578
  getEvenAndOddHeaders(): boolean {
11055
11579
  return this._evenAndOddHeaders ?? false;
11056
11580
  }
@@ -11144,6 +11668,72 @@ export class Document {
11144
11668
  this._modifiedBooleanSettings.add('defaultTabStop');
11145
11669
  }
11146
11670
 
11671
+ /**
11672
+ * Sets the default document font by updating the Normal style
11673
+ *
11674
+ * All unstyled text inherits from the Normal style, so this effectively
11675
+ * sets the font for the entire document. Optionally sets the font size too.
11676
+ *
11677
+ * @param fontName - Font family name (e.g., 'Calibri', 'Times New Roman', 'Arial')
11678
+ * @param sizeInPoints - Optional font size in points (e.g., 11, 12, 14)
11679
+ * @returns This document for chaining
11680
+ *
11681
+ * @example
11682
+ * ```typescript
11683
+ * const doc = Document.create();
11684
+ * doc.setDefaultFont('Times New Roman', 12);
11685
+ * doc.createParagraph('This text will be in Times New Roman 12pt');
11686
+ * ```
11687
+ */
11688
+ setDefaultFont(fontName: string, sizeInPoints?: number): this {
11689
+ let normalStyle = this.stylesManager.getStyle('Normal');
11690
+ if (!normalStyle) {
11691
+ normalStyle = new Style({
11692
+ styleId: 'Normal',
11693
+ name: 'Normal',
11694
+ type: 'paragraph',
11695
+ isDefault: true,
11696
+ });
11697
+ this.stylesManager.addStyle(normalStyle);
11698
+ }
11699
+
11700
+ const existing = normalStyle.getRunFormatting() ?? {};
11701
+ const updated: RunFormatting = { ...existing, font: fontName };
11702
+ if (sizeInPoints !== undefined) {
11703
+ updated.size = sizeInPoints;
11704
+ }
11705
+ normalStyle.setRunFormatting(updated);
11706
+ return this;
11707
+ }
11708
+
11709
+ /**
11710
+ * Sets the default document font size by updating the Normal style
11711
+ *
11712
+ * @param sizeInPoints - Font size in points (e.g., 10, 11, 12, 14)
11713
+ * @returns This document for chaining
11714
+ *
11715
+ * @example
11716
+ * ```typescript
11717
+ * doc.setDefaultFontSize(14);
11718
+ * ```
11719
+ */
11720
+ setDefaultFontSize(sizeInPoints: number): this {
11721
+ let normalStyle = this.stylesManager.getStyle('Normal');
11722
+ if (!normalStyle) {
11723
+ normalStyle = new Style({
11724
+ styleId: 'Normal',
11725
+ name: 'Normal',
11726
+ type: 'paragraph',
11727
+ isDefault: true,
11728
+ });
11729
+ this.stylesManager.addStyle(normalStyle);
11730
+ }
11731
+
11732
+ const existing = normalStyle.getRunFormatting() ?? {};
11733
+ normalStyle.setRunFormatting({ ...existing, size: sizeInPoints });
11734
+ return this;
11735
+ }
11736
+
11147
11737
  /**
11148
11738
  * Gets whether fields are updated on document open (w:updateFields)
11149
11739
  */
@@ -11627,19 +12217,31 @@ export class Document {
11627
12217
  return this.commentManager.getAllComments();
11628
12218
  }
11629
12219
 
12220
+ /** Returns the footnote manager for advanced footnote operations. */
11630
12221
  getFootnoteManager(): FootnoteManager {
11631
12222
  return this.footnoteManager;
11632
12223
  }
11633
12224
 
12225
+ /** Returns the endnote manager for advanced endnote operations. */
11634
12226
  getEndnoteManager(): EndnoteManager {
11635
12227
  return this.endnoteManager;
11636
12228
  }
11637
12229
 
12230
+ /**
12231
+ * Creates a new footnote with the given text and adds a reference in the document.
12232
+ * @param text - The footnote text content
12233
+ * @returns The created Footnote object
12234
+ */
11638
12235
  createFootnote(text: string): Footnote {
11639
12236
  this._footnotesModified = true;
11640
12237
  return this.footnoteManager.createFootnote(text);
11641
12238
  }
11642
12239
 
12240
+ /**
12241
+ * Creates a new endnote with the given text and adds a reference in the document.
12242
+ * @param text - The endnote text content
12243
+ * @returns The created Endnote object
12244
+ */
11643
12245
  createEndnote(text: string): Endnote {
11644
12246
  this._endnotesModified = true;
11645
12247
  return this.endnoteManager.createEndnote(text);
@@ -12413,6 +13015,34 @@ export class Document {
12413
13015
  return count;
12414
13016
  }
12415
13017
 
13018
+ /**
13019
+ * Removes all body content from the document
13020
+ *
13021
+ * Clears all paragraphs, tables, and other body elements while
13022
+ * preserving the document shell (styles, numbering, settings,
13023
+ * properties, headers, footers). The document remains valid and
13024
+ * new content can be added after clearing.
13025
+ *
13026
+ * @returns This document for chaining
13027
+ *
13028
+ * @example
13029
+ * ```typescript
13030
+ * // Clear and rebuild content
13031
+ * doc.clear();
13032
+ * doc.addHeading('Fresh Start', 1);
13033
+ * doc.createParagraph('New content here.');
13034
+ *
13035
+ * // Use as a template reset
13036
+ * const template = await Document.load('template.docx');
13037
+ * template.clear();
13038
+ * // Styles and settings preserved, content gone
13039
+ * ```
13040
+ */
13041
+ clear(): this {
13042
+ this.bodyElements = [];
13043
+ return this;
13044
+ }
13045
+
12416
13046
  /**
12417
13047
  * Cleans up resources and clears all managers
12418
13048
  * Call this after saving in long-running processes to free memory
@@ -13889,6 +14519,153 @@ export class Document {
13889
14519
  return trackChanges ? { count, revisions } : { count };
13890
14520
  }
13891
14521
 
14522
+ /**
14523
+ * Fills template placeholders with values using cross-run replacement
14524
+ *
14525
+ * Replaces `{{key}}` placeholders throughout the document (paragraphs
14526
+ * and table cells) with the corresponding values from the data object.
14527
+ * Uses cross-run matching, so placeholders that Word has fragmented across
14528
+ * multiple runs (e.g., `{{` in one run, `name` in another, `}}` in a third)
14529
+ * are found and replaced correctly.
14530
+ *
14531
+ * The replacement text inherits the formatting of the first run in the
14532
+ * matched placeholder. Delimiter style can be customized.
14533
+ *
14534
+ * @param data - Key-value pairs where keys match placeholder names
14535
+ * @param options - Template options
14536
+ * @param options.delimiters - Custom open/close delimiters (default: `['{{', '}}']`)
14537
+ * @returns Total number of replacements made
14538
+ *
14539
+ * @example
14540
+ * ```typescript
14541
+ * // Document contains: "Dear {{name}}, your order {{orderId}} is ready."
14542
+ * const count = doc.fillTemplate({
14543
+ * name: 'Alice',
14544
+ * orderId: 'ORD-12345',
14545
+ * });
14546
+ * // Result: "Dear Alice, your order ORD-12345 is ready."
14547
+ * ```
14548
+ *
14549
+ * @example
14550
+ * ```typescript
14551
+ * // Custom delimiters
14552
+ * doc.fillTemplate(
14553
+ * { title: 'Report', date: '2024-01-15' },
14554
+ * { delimiters: ['<<', '>>'] }
14555
+ * );
14556
+ * ```
14557
+ */
14558
+ fillTemplate(data: Record<string, string>, options?: { delimiters?: [string, string] }): number {
14559
+ const [open, close] = options?.delimiters ?? ['{{', '}}'];
14560
+ let totalCount = 0;
14561
+
14562
+ const allParagraphs = this.getAllParagraphs();
14563
+ for (const [key, value] of Object.entries(data)) {
14564
+ const placeholder = `${open}${key}${close}`;
14565
+ for (const para of allParagraphs) {
14566
+ totalCount += para.replaceTextCrossRun(placeholder, value);
14567
+ }
14568
+ }
14569
+
14570
+ return totalCount;
14571
+ }
14572
+
14573
+ /**
14574
+ * Finds all occurrences of text and applies highlight color
14575
+ *
14576
+ * Searches across run boundaries (handles Word-fragmented text) and
14577
+ * applies character highlight formatting to every match. Uses
14578
+ * `findTextCrossRun` + `applyFormattingToRange` internally.
14579
+ *
14580
+ * @param text - Text to search for
14581
+ * @param color - Highlight color (default: 'yellow')
14582
+ * @param options - Search options
14583
+ * @param options.caseSensitive - Match case exactly (default: false)
14584
+ * @returns Number of matches highlighted
14585
+ *
14586
+ * @example
14587
+ * ```typescript
14588
+ * // Highlight all occurrences of "important" in yellow
14589
+ * doc.findAndHighlight('important');
14590
+ *
14591
+ * // Red highlight, case-sensitive
14592
+ * doc.findAndHighlight('ERROR', 'red', { caseSensitive: true });
14593
+ * ```
14594
+ */
14595
+ findAndHighlight(
14596
+ text: string,
14597
+ color:
14598
+ | 'yellow'
14599
+ | 'green'
14600
+ | 'cyan'
14601
+ | 'magenta'
14602
+ | 'blue'
14603
+ | 'red'
14604
+ | 'darkBlue'
14605
+ | 'darkCyan'
14606
+ | 'darkGreen'
14607
+ | 'darkMagenta'
14608
+ | 'darkRed'
14609
+ | 'darkYellow'
14610
+ | 'darkGray'
14611
+ | 'lightGray'
14612
+ | 'black' = 'yellow',
14613
+ options?: { caseSensitive?: boolean }
14614
+ ): number {
14615
+ return this.findAndFormat(text, { highlight: color }, options);
14616
+ }
14617
+
14618
+ /**
14619
+ * Finds all occurrences of text and applies formatting
14620
+ *
14621
+ * Searches across run boundaries (handles Word-fragmented text) and
14622
+ * applies the specified run formatting to every match. This is the
14623
+ * general-purpose version of `findAndHighlight()`.
14624
+ *
14625
+ * @param text - Text to search for
14626
+ * @param formatting - RunFormatting to apply to matches
14627
+ * @param options - Search options
14628
+ * @param options.caseSensitive - Match case exactly (default: false)
14629
+ * @returns Number of matches formatted
14630
+ *
14631
+ * @example
14632
+ * ```typescript
14633
+ * // Bold all occurrences of "warning"
14634
+ * doc.findAndFormat('warning', { bold: true, color: 'FF0000' });
14635
+ *
14636
+ * // Strikethrough deprecated terms
14637
+ * doc.findAndFormat('deprecated', { strike: true, color: '888888' });
14638
+ *
14639
+ * // Apply multiple styles to a term
14640
+ * doc.findAndFormat('critical', {
14641
+ * bold: true,
14642
+ * highlight: 'red',
14643
+ * underline: 'single',
14644
+ * });
14645
+ * ```
14646
+ */
14647
+ findAndFormat(
14648
+ text: string,
14649
+ formatting: Partial<RunFormatting>,
14650
+ options?: { caseSensitive?: boolean }
14651
+ ): number {
14652
+ let totalMatches = 0;
14653
+
14654
+ for (const para of this.getAllParagraphs()) {
14655
+ const matches = para.findTextCrossRun(text, options);
14656
+
14657
+ // Apply formatting in reverse order to preserve offsets
14658
+ for (let i = matches.length - 1; i >= 0; i--) {
14659
+ const match = matches[i]!;
14660
+ para.applyFormattingToRange(match.offset, match.offset + match.text.length, formatting);
14661
+ }
14662
+
14663
+ totalMatches += matches.length;
14664
+ }
14665
+
14666
+ return totalMatches;
14667
+ }
14668
+
13892
14669
  /**
13893
14670
  * Gets the total word count in the document
13894
14671
  *
@@ -14010,6 +14787,800 @@ export class Document {
14010
14787
  return totalChars;
14011
14788
  }
14012
14789
 
14790
+ /**
14791
+ * Returns comprehensive document statistics in a single call
14792
+ *
14793
+ * Aggregates word count, character counts, element counts, and structural
14794
+ * metrics. More efficient than calling individual methods since shared
14795
+ * data (like the paragraph list) is computed once.
14796
+ *
14797
+ * @returns Object with all document metrics
14798
+ *
14799
+ * @example
14800
+ * ```typescript
14801
+ * const stats = doc.getStatistics();
14802
+ * console.log(`Words: ${stats.words}, Pages (est): ${stats.paragraphs}`);
14803
+ * console.log(`Tables: ${stats.tables}, Images: ${stats.images}`);
14804
+ * ```
14805
+ */
14806
+ getStatistics(): {
14807
+ words: number;
14808
+ characters: number;
14809
+ charactersNoSpaces: number;
14810
+ paragraphs: number;
14811
+ tables: number;
14812
+ images: number;
14813
+ headings: number;
14814
+ lists: number;
14815
+ hyperlinks: number;
14816
+ bookmarks: number;
14817
+ footnotes: number;
14818
+ endnotes: number;
14819
+ comments: number;
14820
+ sections: number;
14821
+ } {
14822
+ const allParagraphs = this.getAllParagraphs();
14823
+ const tables = this.getTables();
14824
+
14825
+ let words = 0;
14826
+ let characters = 0;
14827
+ let charactersNoSpaces = 0;
14828
+ let headings = 0;
14829
+ let lists = 0;
14830
+ const counted = new Set<Paragraph>();
14831
+
14832
+ for (const para of allParagraphs) {
14833
+ if (counted.has(para)) continue;
14834
+ counted.add(para);
14835
+
14836
+ const text = para.getText();
14837
+ characters += text.length;
14838
+ charactersNoSpaces += text.replace(/\s/g, '').length;
14839
+
14840
+ const trimmed = text.trim();
14841
+ if (trimmed) {
14842
+ words += trimmed.split(/\s+/).filter((w) => w.length > 0).length;
14843
+ }
14844
+
14845
+ if (para.detectHeadingLevel() !== null) headings++;
14846
+ if (para.hasNumbering()) lists++;
14847
+ }
14848
+
14849
+ // Count table cell text too (for tables not traversed via getAllParagraphs)
14850
+ for (const table of tables) {
14851
+ for (const row of table.getRows()) {
14852
+ for (const cell of row.getCells()) {
14853
+ for (const para of cell.getParagraphs()) {
14854
+ if (counted.has(para)) continue;
14855
+ counted.add(para);
14856
+ const text = para.getText();
14857
+ characters += text.length;
14858
+ charactersNoSpaces += text.replace(/\s/g, '').length;
14859
+ const trimmed = text.trim();
14860
+ if (trimmed) {
14861
+ words += trimmed.split(/\s+/).filter((w) => w.length > 0).length;
14862
+ }
14863
+ }
14864
+ }
14865
+ }
14866
+ }
14867
+
14868
+ return {
14869
+ words,
14870
+ characters,
14871
+ charactersNoSpaces,
14872
+ paragraphs: allParagraphs.length,
14873
+ tables: tables.length,
14874
+ images: this.imageManager.getAllImages().length,
14875
+ headings,
14876
+ lists,
14877
+ hyperlinks: this.getHyperlinks().length,
14878
+ bookmarks: this.bookmarkManager.getAllBookmarks().length,
14879
+ footnotes: this.footnoteManager.getAllFootnotes().length,
14880
+ endnotes: this.endnoteManager.getAllEndnotes().length,
14881
+ comments: this.commentManager.getAllComments().length,
14882
+ sections: 1, // Base section; multi-section docs add via paragraph section properties
14883
+ };
14884
+ }
14885
+
14886
+ /**
14887
+ * Iterates over top-level paragraphs in the document body (not inside tables)
14888
+ *
14889
+ * Calls the callback for each Paragraph that is a direct child of the body.
14890
+ * Paragraphs inside table cells are NOT included — use `getAllParagraphs()`
14891
+ * or `walkElements()` for those. Supports early termination by returning `false`.
14892
+ *
14893
+ * @param callback - Function called for each paragraph. Return `false` to stop.
14894
+ * @returns Number of paragraphs visited
14895
+ *
14896
+ * @example
14897
+ * ```typescript
14898
+ * // Bold all top-level paragraphs
14899
+ * doc.forEachParagraph((para) => {
14900
+ * para.getRuns().forEach(r => r.setBold(true));
14901
+ * });
14902
+ *
14903
+ * // Find first paragraph matching criteria
14904
+ * let found: Paragraph | undefined;
14905
+ * doc.forEachParagraph((para) => {
14906
+ * if (para.getText().includes('Summary')) {
14907
+ * found = para;
14908
+ * return false;
14909
+ * }
14910
+ * });
14911
+ * ```
14912
+ */
14913
+ forEachParagraph(callback: (paragraph: Paragraph, index: number) => void | false): number {
14914
+ let count = 0;
14915
+ let paraIndex = 0;
14916
+ for (const element of this.bodyElements) {
14917
+ if (element instanceof Paragraph) {
14918
+ const result = callback(element, paraIndex);
14919
+ count++;
14920
+ paraIndex++;
14921
+ if (result === false) break;
14922
+ }
14923
+ }
14924
+ return count;
14925
+ }
14926
+
14927
+ /**
14928
+ * Iterates over top-level tables in the document body
14929
+ *
14930
+ * Calls the callback for each Table that is a direct child of the body.
14931
+ * Supports early termination by returning `false`.
14932
+ *
14933
+ * @param callback - Function called for each table. Return `false` to stop.
14934
+ * @returns Number of tables visited
14935
+ *
14936
+ * @example
14937
+ * ```typescript
14938
+ * // Remove empty rows from all tables
14939
+ * doc.forEachTable((table) => {
14940
+ * table.removeEmptyRows();
14941
+ * });
14942
+ *
14943
+ * // Find first table with more than 5 rows
14944
+ * let bigTable: Table | undefined;
14945
+ * doc.forEachTable((table) => {
14946
+ * if (table.getRowCount() > 5) {
14947
+ * bigTable = table;
14948
+ * return false;
14949
+ * }
14950
+ * });
14951
+ * ```
14952
+ */
14953
+ forEachTable(callback: (table: Table, index: number) => void | false): number {
14954
+ let count = 0;
14955
+ let tableIndex = 0;
14956
+ for (const element of this.bodyElements) {
14957
+ if (element instanceof Table) {
14958
+ const result = callback(element, tableIndex);
14959
+ count++;
14960
+ tableIndex++;
14961
+ if (result === false) break;
14962
+ }
14963
+ }
14964
+ return count;
14965
+ }
14966
+
14967
+ /**
14968
+ * Extracts all text content from the document as a plain string.
14969
+ * Concatenates text from all paragraphs (including those in tables),
14970
+ * separated by newlines.
14971
+ *
14972
+ * @param separator - String to insert between paragraphs (default: '\n')
14973
+ * @returns Plain text content of the entire document
14974
+ *
14975
+ * @example
14976
+ * ```typescript
14977
+ * const text = doc.toPlainText();
14978
+ * console.log(text);
14979
+ *
14980
+ * // With custom separator
14981
+ * const singleLine = doc.toPlainText(' ');
14982
+ * ```
14983
+ */
14984
+ toPlainText(separator = '\n'): string {
14985
+ const paragraphs = this.getAllParagraphs();
14986
+ return paragraphs.map((p) => p.getText()).join(separator);
14987
+ }
14988
+
14989
+ /**
14990
+ * Converts the document to Markdown format
14991
+ *
14992
+ * Iterates body elements in order and converts them to Markdown syntax:
14993
+ * - Headings → `#` / `##` / `###` etc.
14994
+ * - Bold/italic runs → `**bold**` / `*italic*`
14995
+ * - Hyperlinks → `[text](url)`
14996
+ * - Tables → pipe-delimited Markdown tables with alignment row
14997
+ * - Numbered/bulleted lists → `1.` / `-` prefixes
14998
+ * - Regular paragraphs → plain text with blank lines between
14999
+ *
15000
+ * Useful for AI/LLM pipelines, content migration, documentation
15001
+ * generation, and plain-text extraction with structure preserved.
15002
+ *
15003
+ * @returns Markdown string representation of the document
15004
+ *
15005
+ * @example
15006
+ * ```typescript
15007
+ * const md = doc.toMarkdown();
15008
+ * console.log(md);
15009
+ * // # Document Title
15010
+ * //
15011
+ * // Opening paragraph text.
15012
+ * //
15013
+ * // ## Section 1
15014
+ * //
15015
+ * // | Name | Age |
15016
+ * // | --- | --- |
15017
+ * // | Alice | 30 |
15018
+ * ```
15019
+ */
15020
+ toMarkdown(): string {
15021
+ const lines: string[] = [];
15022
+
15023
+ for (const element of this.bodyElements) {
15024
+ if (element instanceof Paragraph) {
15025
+ const mdLine = this.paragraphToMarkdown(element);
15026
+ if (mdLine !== null) {
15027
+ lines.push(mdLine);
15028
+ lines.push('');
15029
+ }
15030
+ } else if (element instanceof Table) {
15031
+ lines.push(...this.tableToMarkdown(element));
15032
+ lines.push('');
15033
+ }
15034
+ // Other element types (SDT, AlternateContent, etc.) are skipped
15035
+ }
15036
+
15037
+ // Remove trailing blank line
15038
+ while (lines.length > 0 && lines[lines.length - 1] === '') {
15039
+ lines.pop();
15040
+ }
15041
+
15042
+ return lines.join('\n');
15043
+ }
15044
+
15045
+ /**
15046
+ * Converts a paragraph to a Markdown line.
15047
+ * @internal
15048
+ */
15049
+ private paragraphToMarkdown(para: Paragraph): string | null {
15050
+ const text = this.paragraphContentToMarkdown(para);
15051
+ if (!text && !para.hasNumbering()) return null;
15052
+
15053
+ // Headings
15054
+ const headingLevel = para.detectHeadingLevel();
15055
+ if (headingLevel !== null && headingLevel >= 1 && headingLevel <= 6) {
15056
+ return '#'.repeat(headingLevel) + ' ' + text;
15057
+ }
15058
+
15059
+ // Numbered/bulleted lists
15060
+ if (para.hasNumbering()) {
15061
+ const style = para.getStyle();
15062
+ const isBullet =
15063
+ style?.toLowerCase().includes('bullet') || style?.toLowerCase().includes('list bullet');
15064
+ return isBullet ? `- ${text}` : `1. ${text}`;
15065
+ }
15066
+
15067
+ return text;
15068
+ }
15069
+
15070
+ /**
15071
+ * Converts paragraph inline content to Markdown with formatting.
15072
+ * @internal
15073
+ */
15074
+ private paragraphContentToMarkdown(para: Paragraph): string {
15075
+ const parts: string[] = [];
15076
+
15077
+ for (const item of para.getContent()) {
15078
+ if (item instanceof Run) {
15079
+ const runText = item.getText();
15080
+ if (!runText) continue;
15081
+
15082
+ const fmt = item.getFormatting();
15083
+ let md = runText;
15084
+
15085
+ // Apply inline formatting (bold + italic combined)
15086
+ if (fmt.bold && fmt.italic) {
15087
+ md = `***${md}***`;
15088
+ } else if (fmt.bold) {
15089
+ md = `**${md}**`;
15090
+ } else if (fmt.italic) {
15091
+ md = `*${md}*`;
15092
+ }
15093
+
15094
+ if (fmt.strike) {
15095
+ md = `~~${md}~~`;
15096
+ }
15097
+
15098
+ // Inline code (monospace font detection)
15099
+ if (
15100
+ fmt.font &&
15101
+ /^(courier|consolas|monaco|menlo|source code|fira code|jetbrains mono)/i.test(fmt.font)
15102
+ ) {
15103
+ md = `\`${runText}\``;
15104
+ }
15105
+
15106
+ parts.push(md);
15107
+ } else if (item instanceof Hyperlink) {
15108
+ const url = item.getUrl() || '';
15109
+ const linkText = item.getText() || url;
15110
+ parts.push(`[${linkText}](${url})`);
15111
+ }
15112
+ // Revisions, fields, shapes, etc. — extract text if possible
15113
+ }
15114
+
15115
+ return parts.join('');
15116
+ }
15117
+
15118
+ /**
15119
+ * Converts a table to Markdown table lines.
15120
+ * @internal
15121
+ */
15122
+ private tableToMarkdown(table: Table): string[] {
15123
+ const data = table.toArray();
15124
+ if (data.length === 0) return [];
15125
+
15126
+ const colCount = Math.max(...data.map((row) => row.length));
15127
+ if (colCount === 0) return [];
15128
+
15129
+ // Normalize all rows to same column count
15130
+ const normalized = data.map((row) => {
15131
+ const padded = [...row];
15132
+ while (padded.length < colCount) padded.push('');
15133
+ // Escape pipes and normalize whitespace in cell text
15134
+ return padded.map((cell) => cell.replace(/\|/g, '\\|').replace(/\n/g, ' ').trim());
15135
+ });
15136
+
15137
+ const lines: string[] = [];
15138
+
15139
+ // Header row
15140
+ lines.push('| ' + normalized[0]!.join(' | ') + ' |');
15141
+
15142
+ // Separator row
15143
+ lines.push('| ' + normalized[0]!.map(() => '---').join(' | ') + ' |');
15144
+
15145
+ // Data rows
15146
+ for (let i = 1; i < normalized.length; i++) {
15147
+ lines.push('| ' + normalized[i]!.join(' | ') + ' |');
15148
+ }
15149
+
15150
+ return lines;
15151
+ }
15152
+
15153
+ /**
15154
+ * Converts the document to an HTML string
15155
+ *
15156
+ * Iterates body elements and renders them as semantic HTML:
15157
+ * - Headings → `<h1>` through `<h6>`
15158
+ * - Bold → `<strong>`, italic → `<em>`, strikethrough → `<s>`
15159
+ * - Inline code (monospace fonts) → `<code>`
15160
+ * - Hyperlinks → `<a href="...">`
15161
+ * - Tables → `<table>` with `<thead>` / `<tbody>`
15162
+ * - Bullet lists → `<ul><li>`, numbered lists → `<ol><li>`
15163
+ * - Regular paragraphs → `<p>`
15164
+ *
15165
+ * Useful for web display, email bodies, CMS import, and rich-text previews.
15166
+ *
15167
+ * @param options - Output options
15168
+ * @param options.wrapInDocument - Wrap in `<!DOCTYPE html>` with head/body (default: false)
15169
+ * @param options.title - Document title for the `<title>` tag (only when wrapInDocument is true)
15170
+ * @returns HTML string
15171
+ *
15172
+ * @example
15173
+ * ```typescript
15174
+ * // Fragment for embedding
15175
+ * const html = doc.toHTML();
15176
+ *
15177
+ * // Full HTML document
15178
+ * const page = doc.toHTML({ wrapInDocument: true, title: 'My Report' });
15179
+ * ```
15180
+ */
15181
+ toHTML(options?: { wrapInDocument?: boolean; title?: string }): string {
15182
+ const parts: string[] = [];
15183
+ let inList: 'ul' | 'ol' | null = null;
15184
+
15185
+ const closeList = () => {
15186
+ if (inList) {
15187
+ parts.push(`</${inList}>`);
15188
+ inList = null;
15189
+ }
15190
+ };
15191
+
15192
+ for (const element of this.bodyElements) {
15193
+ if (element instanceof Paragraph) {
15194
+ const headingLevel = element.detectHeadingLevel();
15195
+ const style = element.getStyle();
15196
+ const isBullet = style?.toLowerCase().includes('bullet') || style === 'ListBullet';
15197
+ const isNumber =
15198
+ style?.toLowerCase().includes('listnumber') ||
15199
+ style?.toLowerCase().includes('list number') ||
15200
+ style === 'ListNumber';
15201
+
15202
+ if (isBullet || isNumber) {
15203
+ const listType = isBullet ? 'ul' : 'ol';
15204
+ if (inList !== listType) {
15205
+ closeList();
15206
+ inList = listType;
15207
+ parts.push(`<${listType}>`);
15208
+ }
15209
+ parts.push(`<li>${this.paragraphContentToHTML(element)}</li>`);
15210
+ continue;
15211
+ }
15212
+
15213
+ closeList();
15214
+
15215
+ if (headingLevel !== null && headingLevel >= 1 && headingLevel <= 6) {
15216
+ parts.push(
15217
+ `<h${headingLevel}>${this.paragraphContentToHTML(element)}</h${headingLevel}>`
15218
+ );
15219
+ } else {
15220
+ const content = this.paragraphContentToHTML(element);
15221
+ if (content) {
15222
+ parts.push(`<p>${content}</p>`);
15223
+ }
15224
+ }
15225
+ } else if (element instanceof Table) {
15226
+ closeList();
15227
+ parts.push(this.tableToHTML(element));
15228
+ }
15229
+ }
15230
+
15231
+ closeList();
15232
+
15233
+ const body = parts.join('\n');
15234
+
15235
+ if (options?.wrapInDocument) {
15236
+ const title = options.title ? this.escapeHTML(options.title) : 'Document';
15237
+ return [
15238
+ '<!DOCTYPE html>',
15239
+ '<html>',
15240
+ '<head>',
15241
+ `<meta charset="utf-8">`,
15242
+ `<title>${title}</title>`,
15243
+ '</head>',
15244
+ '<body>',
15245
+ body,
15246
+ '</body>',
15247
+ '</html>',
15248
+ ].join('\n');
15249
+ }
15250
+
15251
+ return body;
15252
+ }
15253
+
15254
+ /**
15255
+ * Converts paragraph inline content to HTML.
15256
+ * @internal
15257
+ */
15258
+ private paragraphContentToHTML(para: Paragraph): string {
15259
+ const parts: string[] = [];
15260
+
15261
+ for (const item of para.getContent()) {
15262
+ if (item instanceof Run) {
15263
+ const text = item.getText();
15264
+ if (!text) continue;
15265
+
15266
+ const escaped = this.escapeHTML(text);
15267
+ const fmt = item.getFormatting();
15268
+
15269
+ // Detect monospace font
15270
+ const isMono =
15271
+ fmt.font &&
15272
+ /^(courier|consolas|monaco|menlo|source code|fira code|jetbrains mono)/i.test(fmt.font);
15273
+
15274
+ if (isMono) {
15275
+ parts.push(`<code>${escaped}</code>`);
15276
+ continue;
15277
+ }
15278
+
15279
+ let html = escaped;
15280
+ if (fmt.bold) html = `<strong>${html}</strong>`;
15281
+ if (fmt.italic) html = `<em>${html}</em>`;
15282
+ if (fmt.strike) html = `<s>${html}</s>`;
15283
+ if (fmt.underline && fmt.underline !== 'none') {
15284
+ html = `<u>${html}</u>`;
15285
+ }
15286
+
15287
+ parts.push(html);
15288
+ } else if (item instanceof Hyperlink) {
15289
+ const url = this.escapeHTML(item.getUrl() || '');
15290
+ const linkText = this.escapeHTML(item.getText() || url);
15291
+ parts.push(`<a href="${url}">${linkText}</a>`);
15292
+ }
15293
+ }
15294
+
15295
+ return parts.join('');
15296
+ }
15297
+
15298
+ /**
15299
+ * Converts a table to an HTML table string.
15300
+ * @internal
15301
+ */
15302
+ private tableToHTML(table: Table): string {
15303
+ const rows = table.getRows();
15304
+ if (rows.length === 0) return '';
15305
+
15306
+ const lines: string[] = ['<table>'];
15307
+
15308
+ // First row as thead
15309
+ const headerCells = rows[0]!.getCells();
15310
+ lines.push('<thead>');
15311
+ lines.push('<tr>');
15312
+ for (const cell of headerCells) {
15313
+ lines.push(`<th>${this.escapeHTML(cell.getText())}</th>`);
15314
+ }
15315
+ lines.push('</tr>');
15316
+ lines.push('</thead>');
15317
+
15318
+ // Remaining rows as tbody
15319
+ if (rows.length > 1) {
15320
+ lines.push('<tbody>');
15321
+ for (let r = 1; r < rows.length; r++) {
15322
+ lines.push('<tr>');
15323
+ for (const cell of rows[r]!.getCells()) {
15324
+ lines.push(`<td>${this.escapeHTML(cell.getText())}</td>`);
15325
+ }
15326
+ lines.push('</tr>');
15327
+ }
15328
+ lines.push('</tbody>');
15329
+ }
15330
+
15331
+ lines.push('</table>');
15332
+ return lines.join('\n');
15333
+ }
15334
+
15335
+ /**
15336
+ * Escapes HTML special characters.
15337
+ * @internal
15338
+ */
15339
+ private escapeHTML(text: string): string {
15340
+ return text
15341
+ .replace(/&/g, '&amp;')
15342
+ .replace(/</g, '&lt;')
15343
+ .replace(/>/g, '&gt;')
15344
+ .replace(/"/g, '&quot;');
15345
+ }
15346
+
15347
+ /**
15348
+ * Returns a JSON-serializable representation of the document structure.
15349
+ * Useful for debugging, inspection, and logging.
15350
+ *
15351
+ * @returns Object with document properties, statistics, and content summary
15352
+ *
15353
+ * @example
15354
+ * ```typescript
15355
+ * const json = doc.toJSON();
15356
+ * console.log(JSON.stringify(json, null, 2));
15357
+ * ```
15358
+ */
15359
+ toJSON(): {
15360
+ properties: DocumentProperties;
15361
+ stats: {
15362
+ paragraphs: number;
15363
+ tables: number;
15364
+ images: number;
15365
+ headings: number;
15366
+ sections: number;
15367
+ };
15368
+ headings: { level: number; text: string }[];
15369
+ body: { type: string; text?: string; style?: string }[];
15370
+ } {
15371
+ const paragraphs = this.getAllParagraphs();
15372
+ const tables = this.getTables();
15373
+ const headings = this.getHeadingHierarchy();
15374
+
15375
+ return {
15376
+ properties: this.getProperties(),
15377
+ stats: {
15378
+ paragraphs: paragraphs.length,
15379
+ tables: tables.length,
15380
+ images: this.imageManager.getImageCount(),
15381
+ headings: headings.length,
15382
+ sections: this.bodyElements.filter((el) => el instanceof Section).length || 1,
15383
+ },
15384
+ headings: headings.map((h) => ({ level: h.level, text: h.text })),
15385
+ body: this.bodyElements.map((el) => {
15386
+ if (el instanceof Paragraph) {
15387
+ return {
15388
+ type: 'paragraph',
15389
+ text: el.getText(),
15390
+ style: el.getStyle(),
15391
+ };
15392
+ }
15393
+ if (el instanceof Table) {
15394
+ return {
15395
+ type: 'table',
15396
+ text: `${el.getRows().length} rows x ${el.getRows()[0]?.getCells().length ?? 0} cols`,
15397
+ };
15398
+ }
15399
+ return { type: el.constructor.name };
15400
+ }),
15401
+ };
15402
+ }
15403
+
15404
+ /**
15405
+ * Finds all images in the document that have no alt text or only the default alt text.
15406
+ * Useful for accessibility auditing.
15407
+ *
15408
+ * @returns Array of Image elements missing meaningful alt text
15409
+ *
15410
+ * @example
15411
+ * ```typescript
15412
+ * const missing = doc.findImagesWithoutAltText();
15413
+ * console.log(`${missing.length} images need alt text`);
15414
+ * for (const img of missing) {
15415
+ * img.setAltText('Description of the image');
15416
+ * }
15417
+ * ```
15418
+ */
15419
+ findImagesWithoutAltText(): Image[] {
15420
+ const results: Image[] = [];
15421
+ for (const para of this.getAllParagraphs()) {
15422
+ for (const item of para.getContent()) {
15423
+ if (item instanceof ImageRun) {
15424
+ const image = item.getImageElement();
15425
+ const altText = image.getAltText();
15426
+ if (!altText || altText === 'Image') {
15427
+ results.push(image);
15428
+ }
15429
+ }
15430
+ if (item instanceof Revision) {
15431
+ for (const revContent of item.getContent()) {
15432
+ if (revContent instanceof ImageRun) {
15433
+ const image = revContent.getImageElement();
15434
+ const altText = image.getAltText();
15435
+ if (!altText || altText === 'Image') {
15436
+ results.push(image);
15437
+ }
15438
+ }
15439
+ }
15440
+ }
15441
+ }
15442
+ }
15443
+ return results;
15444
+ }
15445
+
15446
+ /**
15447
+ * Returns the heading hierarchy of the document as a flat list.
15448
+ * Each entry includes the heading level, text content, and the paragraph object.
15449
+ * Useful for accessibility auditing (detecting skipped levels) and TOC generation.
15450
+ *
15451
+ * @returns Array of heading entries sorted by document order
15452
+ *
15453
+ * @example
15454
+ * ```typescript
15455
+ * const headings = doc.getHeadingHierarchy();
15456
+ * for (const h of headings) {
15457
+ * console.log(`${' '.repeat(h.level - 1)}H${h.level}: ${h.text}`);
15458
+ * }
15459
+ *
15460
+ * // Check for skipped levels (accessibility issue)
15461
+ * for (let i = 1; i < headings.length; i++) {
15462
+ * if (headings[i].level - headings[i - 1].level > 1) {
15463
+ * console.warn(`Skipped heading level: H${headings[i - 1].level} -> H${headings[i].level}`);
15464
+ * }
15465
+ * }
15466
+ * ```
15467
+ */
15468
+ getHeadingHierarchy(): { level: number; text: string; paragraph: Paragraph }[] {
15469
+ const results: { level: number; text: string; paragraph: Paragraph }[] = [];
15470
+ for (const para of this.getAllParagraphs()) {
15471
+ const level = para.detectHeadingLevel();
15472
+ if (level !== null) {
15473
+ results.push({
15474
+ level,
15475
+ text: para.getText(),
15476
+ paragraph: para,
15477
+ });
15478
+ }
15479
+ }
15480
+ return results;
15481
+ }
15482
+
15483
+ /**
15484
+ * Groups body elements into sections delimited by headings
15485
+ *
15486
+ * Walks the body elements in order and splits them at each heading paragraph
15487
+ * at or above the specified level. Each section contains the heading paragraph
15488
+ * and all subsequent body elements until the next heading at that level or higher.
15489
+ *
15490
+ * Content before the first matching heading is returned as a section with
15491
+ * `heading: undefined` and `level: 0`.
15492
+ *
15493
+ * @param maxLevel - Maximum heading level to split on (default: 1, meaning only H1
15494
+ * starts a new section). Set to 2 to also split on H2, 3 for H1-H3, etc.
15495
+ * @returns Array of sections, each with heading info and content elements
15496
+ *
15497
+ * @example
15498
+ * ```typescript
15499
+ * // Split document by H1 headings (chapters)
15500
+ * const chapters = doc.extractByHeading(1);
15501
+ * for (const chapter of chapters) {
15502
+ * console.log(`Chapter: ${chapter.heading?.getText() ?? '(preamble)'}`);
15503
+ * console.log(` ${chapter.content.length} elements`);
15504
+ * }
15505
+ * ```
15506
+ *
15507
+ * @example
15508
+ * ```typescript
15509
+ * // Split by H1 and H2 (chapters and sections)
15510
+ * const sections = doc.extractByHeading(2);
15511
+ *
15512
+ * // Extract a specific section's content as markdown
15513
+ * const target = sections.find(s => s.heading?.getText() === 'Methods');
15514
+ * ```
15515
+ */
15516
+ extractByHeading(maxLevel = 1): {
15517
+ heading: Paragraph | undefined;
15518
+ level: number;
15519
+ content: BodyElement[];
15520
+ }[] {
15521
+ const sections: { heading: Paragraph | undefined; level: number; content: BodyElement[] }[] =
15522
+ [];
15523
+ let current: { heading: Paragraph | undefined; level: number; content: BodyElement[] } = {
15524
+ heading: undefined,
15525
+ level: 0,
15526
+ content: [],
15527
+ };
15528
+
15529
+ for (const element of this.bodyElements) {
15530
+ if (element instanceof Paragraph) {
15531
+ const headingLevel = element.detectHeadingLevel();
15532
+
15533
+ if (headingLevel !== null && headingLevel <= maxLevel) {
15534
+ // Save current section if it has any content or a heading
15535
+ if (current.heading || current.content.length > 0) {
15536
+ sections.push(current);
15537
+ }
15538
+ // Start a new section
15539
+ current = { heading: element, level: headingLevel, content: [] };
15540
+ continue;
15541
+ }
15542
+ }
15543
+
15544
+ current.content.push(element);
15545
+ }
15546
+
15547
+ // Push the last section
15548
+ if (current.heading || current.content.length > 0) {
15549
+ sections.push(current);
15550
+ }
15551
+
15552
+ return sections;
15553
+ }
15554
+
15555
+ /**
15556
+ * Returns all body elements between two reference elements (exclusive)
15557
+ *
15558
+ * Finds both elements in the body and returns everything between them.
15559
+ * The start and end elements themselves are NOT included in the result.
15560
+ * Returns an empty array if either element is not found or if start
15561
+ * appears after end.
15562
+ *
15563
+ * @param startElement - Element after which to begin collecting
15564
+ * @param endElement - Element before which to stop collecting
15565
+ * @returns Array of body elements between the two references
15566
+ *
15567
+ * @example
15568
+ * ```typescript
15569
+ * const headings = doc.getParagraphs().filter(p => p.detectHeadingLevel() === 1);
15570
+ * const chapter1Content = doc.getElementsBetween(headings[0], headings[1]);
15571
+ * ```
15572
+ */
15573
+ getElementsBetween(startElement: BodyElement, endElement: BodyElement): BodyElement[] {
15574
+ const startIndex = this.bodyElements.indexOf(startElement);
15575
+ const endIndex = this.bodyElements.indexOf(endElement);
15576
+
15577
+ if (startIndex === -1 || endIndex === -1 || startIndex >= endIndex) {
15578
+ return [];
15579
+ }
15580
+
15581
+ return this.bodyElements.slice(startIndex + 1, endIndex);
15582
+ }
15583
+
14013
15584
  /**
14014
15585
  * Removes a paragraph from the document
14015
15586
  * @param paragraphOrIndex - The paragraph object or its index
@@ -14454,6 +16025,122 @@ export class Document {
14454
16025
  return false;
14455
16026
  }
14456
16027
 
16028
+ /**
16029
+ * Removes a body element by reference
16030
+ *
16031
+ * Finds the element in the body and removes it. More convenient than
16032
+ * the index-based `removeBodyElementAt()` when you already have a
16033
+ * reference to the element.
16034
+ *
16035
+ * @param element - The element to remove
16036
+ * @returns True if removed, false if not found
16037
+ *
16038
+ * @example
16039
+ * ```typescript
16040
+ * // Remove a specific paragraph
16041
+ * const para = doc.getParagraphs().find(p => p.getText() === 'Delete me');
16042
+ * if (para) doc.removeElement(para);
16043
+ *
16044
+ * // Remove all tables
16045
+ * for (const table of doc.getTables()) {
16046
+ * doc.removeElement(table);
16047
+ * }
16048
+ * ```
16049
+ */
16050
+ removeElement(element: BodyElement): boolean {
16051
+ const index = this.bodyElements.indexOf(element);
16052
+ if (index === -1) return false;
16053
+ this.bodyElements.splice(index, 1);
16054
+ return true;
16055
+ }
16056
+
16057
+ /**
16058
+ * Inserts a body element after a reference element
16059
+ *
16060
+ * Finds the reference element in the body and inserts the new element
16061
+ * immediately after it. Returns false if the reference is not found.
16062
+ *
16063
+ * @param reference - The existing element to insert after
16064
+ * @param element - The element to insert
16065
+ * @returns True if inserted, false if reference not found
16066
+ *
16067
+ * @example
16068
+ * ```typescript
16069
+ * // Find a heading and insert a table after it
16070
+ * const heading = doc.getParagraphs().find(p => p.getText() === 'Data');
16071
+ * if (heading) {
16072
+ * doc.insertAfter(heading, table);
16073
+ * }
16074
+ *
16075
+ * // Split a paragraph and insert content between halves
16076
+ * const tail = para.splitAt(offset);
16077
+ * doc.insertAfter(para, newTable);
16078
+ * doc.insertAfter(newTable, tail);
16079
+ * ```
16080
+ */
16081
+ insertAfter(reference: BodyElement, element: BodyElement): boolean {
16082
+ const index = this.bodyElements.indexOf(reference);
16083
+ if (index === -1) return false;
16084
+ this.bodyElements.splice(index + 1, 0, element);
16085
+ return true;
16086
+ }
16087
+
16088
+ /**
16089
+ * Inserts a body element before a reference element
16090
+ *
16091
+ * Finds the reference element in the body and inserts the new element
16092
+ * immediately before it. Returns false if the reference is not found.
16093
+ *
16094
+ * @param reference - The existing element to insert before
16095
+ * @param element - The element to insert
16096
+ * @returns True if inserted, false if reference not found
16097
+ *
16098
+ * @example
16099
+ * ```typescript
16100
+ * // Insert a heading before a table
16101
+ * const table = doc.getTables()[0];
16102
+ * if (table) {
16103
+ * const heading = new Paragraph().addText('Table 1').setStyle('Heading2');
16104
+ * doc.insertBefore(table, heading);
16105
+ * }
16106
+ * ```
16107
+ */
16108
+ insertBefore(reference: BodyElement, element: BodyElement): boolean {
16109
+ const index = this.bodyElements.indexOf(reference);
16110
+ if (index === -1) return false;
16111
+ this.bodyElements.splice(index, 0, element);
16112
+ return true;
16113
+ }
16114
+
16115
+ /**
16116
+ * Replaces a body element with another
16117
+ *
16118
+ * Finds the old element in the body and replaces it in-place with the
16119
+ * new element. The new element occupies the same position. Returns false
16120
+ * if the old element is not found.
16121
+ *
16122
+ * @param oldElement - The element to replace
16123
+ * @param newElement - The replacement element
16124
+ * @returns True if replaced, false if old element not found
16125
+ *
16126
+ * @example
16127
+ * ```typescript
16128
+ * // Replace a placeholder paragraph with a table
16129
+ * const placeholder = doc.getParagraphs().find(
16130
+ * p => p.getText() === '{{INSERT_TABLE_HERE}}'
16131
+ * );
16132
+ * if (placeholder) {
16133
+ * doc.replaceElement(placeholder, dataTable);
16134
+ * }
16135
+ * ```
16136
+ */
16137
+ replaceElement(oldElement: BodyElement, newElement: BodyElement): boolean {
16138
+ const index = this.bodyElements.indexOf(oldElement);
16139
+ if (index === -1) return false;
16140
+ this.bodyElements[index] = newElement;
16141
+ return true;
16142
+ }
16143
+
14457
16144
  /**
14458
16145
  * Inserts a body element at a specific index, shifting existing elements forward.
14459
16146
  * @param index - The zero-based index at which to insert. Clamped to valid range.