@usejunior/docx-core 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +86 -28
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/atomizer.d.ts +218 -0
  5. package/dist/atomizer.d.ts.map +1 -0
  6. package/dist/atomizer.js +856 -0
  7. package/dist/atomizer.js.map +1 -0
  8. package/dist/baselines/atomizer/atomLcs.d.ts +96 -0
  9. package/dist/baselines/atomizer/atomLcs.d.ts.map +1 -0
  10. package/dist/baselines/atomizer/atomLcs.js +347 -0
  11. package/dist/baselines/atomizer/atomLcs.js.map +1 -0
  12. package/dist/baselines/atomizer/debug.d.ts +41 -0
  13. package/dist/baselines/atomizer/debug.d.ts.map +1 -0
  14. package/dist/baselines/atomizer/debug.js +85 -0
  15. package/dist/baselines/atomizer/debug.js.map +1 -0
  16. package/dist/baselines/atomizer/documentReconstructor.d.ts +64 -0
  17. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -0
  18. package/dist/baselines/atomizer/documentReconstructor.js +939 -0
  19. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -0
  20. package/dist/baselines/atomizer/hierarchicalLcs.d.ts +111 -0
  21. package/dist/baselines/atomizer/hierarchicalLcs.d.ts.map +1 -0
  22. package/dist/baselines/atomizer/hierarchicalLcs.js +469 -0
  23. package/dist/baselines/atomizer/hierarchicalLcs.js.map +1 -0
  24. package/dist/baselines/atomizer/inPlaceModifier.d.ts +183 -0
  25. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -0
  26. package/dist/baselines/atomizer/inPlaceModifier.js +1600 -0
  27. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -0
  28. package/dist/baselines/atomizer/numberingIntegration.d.ts +59 -0
  29. package/dist/baselines/atomizer/numberingIntegration.d.ts.map +1 -0
  30. package/dist/baselines/atomizer/numberingIntegration.js +209 -0
  31. package/dist/baselines/atomizer/numberingIntegration.js.map +1 -0
  32. package/dist/baselines/atomizer/pipeline.d.ts +65 -0
  33. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -0
  34. package/dist/baselines/atomizer/pipeline.js +510 -0
  35. package/dist/baselines/atomizer/pipeline.js.map +1 -0
  36. package/dist/baselines/atomizer/premergeRuns.d.ts +26 -0
  37. package/dist/baselines/atomizer/premergeRuns.d.ts.map +1 -0
  38. package/dist/baselines/atomizer/premergeRuns.js +150 -0
  39. package/dist/baselines/atomizer/premergeRuns.js.map +1 -0
  40. package/dist/baselines/atomizer/trackChangesAcceptor.d.ts +63 -0
  41. package/dist/baselines/atomizer/trackChangesAcceptor.d.ts.map +1 -0
  42. package/dist/baselines/atomizer/trackChangesAcceptor.js +254 -0
  43. package/dist/baselines/atomizer/trackChangesAcceptor.js.map +1 -0
  44. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts +64 -0
  45. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -0
  46. package/dist/baselines/atomizer/trackChangesAcceptorAst.js +586 -0
  47. package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -0
  48. package/dist/baselines/atomizer/xmlToWmlElement.d.ts +65 -0
  49. package/dist/baselines/atomizer/xmlToWmlElement.d.ts.map +1 -0
  50. package/dist/baselines/atomizer/xmlToWmlElement.js +95 -0
  51. package/dist/baselines/atomizer/xmlToWmlElement.js.map +1 -0
  52. package/dist/baselines/diffmatch/documentBuilder.d.ts +44 -0
  53. package/dist/baselines/diffmatch/documentBuilder.d.ts.map +1 -0
  54. package/dist/baselines/diffmatch/documentBuilder.js +227 -0
  55. package/dist/baselines/diffmatch/documentBuilder.js.map +1 -0
  56. package/dist/baselines/diffmatch/paragraphAlignment.d.ts +75 -0
  57. package/dist/baselines/diffmatch/paragraphAlignment.d.ts.map +1 -0
  58. package/dist/baselines/diffmatch/paragraphAlignment.js +206 -0
  59. package/dist/baselines/diffmatch/paragraphAlignment.js.map +1 -0
  60. package/dist/baselines/diffmatch/pipeline.d.ts +33 -0
  61. package/dist/baselines/diffmatch/pipeline.d.ts.map +1 -0
  62. package/dist/baselines/diffmatch/pipeline.js +84 -0
  63. package/dist/baselines/diffmatch/pipeline.js.map +1 -0
  64. package/dist/baselines/diffmatch/runDiff.d.ts +53 -0
  65. package/dist/baselines/diffmatch/runDiff.d.ts.map +1 -0
  66. package/dist/baselines/diffmatch/runDiff.js +253 -0
  67. package/dist/baselines/diffmatch/runDiff.js.map +1 -0
  68. package/dist/baselines/diffmatch/trackChangesRenderer.d.ts +64 -0
  69. package/dist/baselines/diffmatch/trackChangesRenderer.d.ts.map +1 -0
  70. package/dist/baselines/diffmatch/trackChangesRenderer.js +178 -0
  71. package/dist/baselines/diffmatch/trackChangesRenderer.js.map +1 -0
  72. package/dist/baselines/diffmatch/xmlParser.d.ts +45 -0
  73. package/dist/baselines/diffmatch/xmlParser.d.ts.map +1 -0
  74. package/dist/baselines/diffmatch/xmlParser.js +344 -0
  75. package/dist/baselines/diffmatch/xmlParser.js.map +1 -0
  76. package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts +51 -0
  77. package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts.map +1 -0
  78. package/dist/baselines/wmlcomparer/DocxodusWasm.js +83 -0
  79. package/dist/baselines/wmlcomparer/DocxodusWasm.js.map +1 -0
  80. package/dist/baselines/wmlcomparer/DotnetCli.d.ts +40 -0
  81. package/dist/baselines/wmlcomparer/DotnetCli.d.ts.map +1 -0
  82. package/dist/baselines/wmlcomparer/DotnetCli.js +135 -0
  83. package/dist/baselines/wmlcomparer/DotnetCli.js.map +1 -0
  84. package/dist/benchmark/metrics.d.ts +72 -0
  85. package/dist/benchmark/metrics.d.ts.map +1 -0
  86. package/dist/benchmark/metrics.js +45 -0
  87. package/dist/benchmark/metrics.js.map +1 -0
  88. package/dist/benchmark/reporter.d.ts +23 -0
  89. package/dist/benchmark/reporter.d.ts.map +1 -0
  90. package/dist/benchmark/reporter.js +147 -0
  91. package/dist/benchmark/reporter.js.map +1 -0
  92. package/dist/benchmark/runner.d.ts +30 -0
  93. package/dist/benchmark/runner.d.ts.map +1 -0
  94. package/dist/benchmark/runner.js +233 -0
  95. package/dist/benchmark/runner.js.map +1 -0
  96. package/dist/cli/compare-two.d.ts +28 -0
  97. package/dist/cli/compare-two.d.ts.map +1 -0
  98. package/dist/cli/compare-two.js +110 -0
  99. package/dist/cli/compare-two.js.map +1 -0
  100. package/dist/cli/index.d.ts +3 -0
  101. package/dist/cli/index.d.ts.map +1 -0
  102. package/dist/cli/index.js +21 -0
  103. package/dist/cli/index.js.map +1 -0
  104. package/dist/core-types.d.ts +296 -0
  105. package/dist/core-types.d.ts.map +1 -0
  106. package/dist/core-types.js +122 -0
  107. package/dist/core-types.js.map +1 -0
  108. package/dist/footnotes.d.ts +144 -0
  109. package/dist/footnotes.d.ts.map +1 -0
  110. package/dist/footnotes.js +291 -0
  111. package/dist/footnotes.js.map +1 -0
  112. package/dist/format-detection.d.ts +120 -0
  113. package/dist/format-detection.d.ts.map +1 -0
  114. package/dist/format-detection.js +338 -0
  115. package/dist/format-detection.js.map +1 -0
  116. package/dist/index.d.ts +177 -0
  117. package/dist/index.d.ts.map +1 -0
  118. package/dist/index.js +55 -0
  119. package/dist/index.js.map +1 -0
  120. package/dist/integration/output-artifacts.d.ts +6 -0
  121. package/dist/integration/output-artifacts.d.ts.map +1 -0
  122. package/dist/integration/output-artifacts.js +30 -0
  123. package/dist/integration/output-artifacts.js.map +1 -0
  124. package/dist/move-detection.d.ts +211 -0
  125. package/dist/move-detection.d.ts.map +1 -0
  126. package/dist/move-detection.js +391 -0
  127. package/dist/move-detection.js.map +1 -0
  128. package/dist/numbering.d.ts +136 -0
  129. package/dist/numbering.d.ts.map +1 -0
  130. package/dist/numbering.js +446 -0
  131. package/dist/numbering.js.map +1 -0
  132. package/dist/primitives/accept_changes.d.ts +30 -0
  133. package/dist/primitives/accept_changes.d.ts.map +1 -0
  134. package/dist/primitives/accept_changes.js +241 -0
  135. package/dist/primitives/accept_changes.js.map +1 -0
  136. package/dist/primitives/bookmarks.d.ts +12 -0
  137. package/dist/primitives/bookmarks.d.ts.map +1 -0
  138. package/dist/primitives/bookmarks.js +248 -0
  139. package/dist/primitives/bookmarks.js.map +1 -0
  140. package/dist/primitives/comments.d.ts +88 -0
  141. package/dist/primitives/comments.d.ts.map +1 -0
  142. package/dist/primitives/comments.js +703 -0
  143. package/dist/primitives/comments.js.map +1 -0
  144. package/dist/primitives/document.d.ts +168 -0
  145. package/dist/primitives/document.d.ts.map +1 -0
  146. package/dist/primitives/document.js +532 -0
  147. package/dist/primitives/document.js.map +1 -0
  148. package/dist/primitives/document_view.d.ts +93 -0
  149. package/dist/primitives/document_view.d.ts.map +1 -0
  150. package/dist/primitives/document_view.js +722 -0
  151. package/dist/primitives/document_view.js.map +1 -0
  152. package/dist/primitives/dom-helpers.d.ts +94 -0
  153. package/dist/primitives/dom-helpers.d.ts.map +1 -0
  154. package/dist/primitives/dom-helpers.js +219 -0
  155. package/dist/primitives/dom-helpers.js.map +1 -0
  156. package/dist/primitives/errors.d.ts +7 -0
  157. package/dist/primitives/errors.d.ts.map +1 -0
  158. package/dist/primitives/errors.js +10 -0
  159. package/dist/primitives/errors.js.map +1 -0
  160. package/dist/primitives/extract_revisions.d.ts +50 -0
  161. package/dist/primitives/extract_revisions.d.ts.map +1 -0
  162. package/dist/primitives/extract_revisions.js +340 -0
  163. package/dist/primitives/extract_revisions.js.map +1 -0
  164. package/dist/primitives/footnotes.d.ts +37 -0
  165. package/dist/primitives/footnotes.d.ts.map +1 -0
  166. package/dist/primitives/footnotes.js +552 -0
  167. package/dist/primitives/footnotes.js.map +1 -0
  168. package/dist/primitives/formatting_tags.d.ts +30 -0
  169. package/dist/primitives/formatting_tags.d.ts.map +1 -0
  170. package/dist/primitives/formatting_tags.js +217 -0
  171. package/dist/primitives/formatting_tags.js.map +1 -0
  172. package/dist/primitives/index.d.ts +26 -0
  173. package/dist/primitives/index.d.ts.map +1 -0
  174. package/dist/primitives/index.js +26 -0
  175. package/dist/primitives/index.js.map +1 -0
  176. package/dist/primitives/layout.d.ts +53 -0
  177. package/dist/primitives/layout.d.ts.map +1 -0
  178. package/dist/primitives/layout.js +178 -0
  179. package/dist/primitives/layout.js.map +1 -0
  180. package/dist/primitives/list_labels.d.ts +19 -0
  181. package/dist/primitives/list_labels.d.ts.map +1 -0
  182. package/dist/primitives/list_labels.js +57 -0
  183. package/dist/primitives/list_labels.js.map +1 -0
  184. package/dist/primitives/matching.d.ts +17 -0
  185. package/dist/primitives/matching.d.ts.map +1 -0
  186. package/dist/primitives/matching.js +144 -0
  187. package/dist/primitives/matching.js.map +1 -0
  188. package/dist/primitives/merge_runs.d.ts +23 -0
  189. package/dist/primitives/merge_runs.d.ts.map +1 -0
  190. package/dist/primitives/merge_runs.js +195 -0
  191. package/dist/primitives/merge_runs.js.map +1 -0
  192. package/dist/primitives/namespaces.d.ts +90 -0
  193. package/dist/primitives/namespaces.d.ts.map +1 -0
  194. package/dist/primitives/namespaces.js +107 -0
  195. package/dist/primitives/namespaces.js.map +1 -0
  196. package/dist/primitives/numbering.d.ts +27 -0
  197. package/dist/primitives/numbering.d.ts.map +1 -0
  198. package/dist/primitives/numbering.js +182 -0
  199. package/dist/primitives/numbering.js.map +1 -0
  200. package/dist/primitives/prevent_double_elevation.d.ts +18 -0
  201. package/dist/primitives/prevent_double_elevation.d.ts.map +1 -0
  202. package/dist/primitives/prevent_double_elevation.js +190 -0
  203. package/dist/primitives/prevent_double_elevation.js.map +1 -0
  204. package/dist/primitives/reject_changes.d.ts +27 -0
  205. package/dist/primitives/reject_changes.d.ts.map +1 -0
  206. package/dist/primitives/reject_changes.js +371 -0
  207. package/dist/primitives/reject_changes.js.map +1 -0
  208. package/dist/primitives/relationships.d.ts +7 -0
  209. package/dist/primitives/relationships.d.ts.map +1 -0
  210. package/dist/primitives/relationships.js +24 -0
  211. package/dist/primitives/relationships.js.map +1 -0
  212. package/dist/primitives/semantic_tags.d.ts +32 -0
  213. package/dist/primitives/semantic_tags.d.ts.map +1 -0
  214. package/dist/primitives/semantic_tags.js +139 -0
  215. package/dist/primitives/semantic_tags.js.map +1 -0
  216. package/dist/primitives/simplify_redlines.d.ts +19 -0
  217. package/dist/primitives/simplify_redlines.d.ts.map +1 -0
  218. package/dist/primitives/simplify_redlines.js +94 -0
  219. package/dist/primitives/simplify_redlines.js.map +1 -0
  220. package/dist/primitives/styles.d.ts +36 -0
  221. package/dist/primitives/styles.d.ts.map +1 -0
  222. package/dist/primitives/styles.js +190 -0
  223. package/dist/primitives/styles.js.map +1 -0
  224. package/dist/primitives/text.d.ts +27 -0
  225. package/dist/primitives/text.d.ts.map +1 -0
  226. package/dist/primitives/text.js +416 -0
  227. package/dist/primitives/text.js.map +1 -0
  228. package/dist/primitives/validate_document.d.ts +24 -0
  229. package/dist/primitives/validate_document.d.ts.map +1 -0
  230. package/dist/primitives/validate_document.js +147 -0
  231. package/dist/primitives/validate_document.js.map +1 -0
  232. package/dist/primitives/xml.d.ts +5 -0
  233. package/dist/primitives/xml.d.ts.map +1 -0
  234. package/dist/primitives/xml.js +19 -0
  235. package/dist/primitives/xml.js.map +1 -0
  236. package/dist/primitives/zip.d.ts +25 -0
  237. package/dist/primitives/zip.d.ts.map +1 -0
  238. package/dist/primitives/zip.js +78 -0
  239. package/dist/primitives/zip.js.map +1 -0
  240. package/dist/shared/docx/DocxArchive.d.ts +94 -0
  241. package/dist/shared/docx/DocxArchive.d.ts.map +1 -0
  242. package/dist/shared/docx/DocxArchive.js +169 -0
  243. package/dist/shared/docx/DocxArchive.js.map +1 -0
  244. package/dist/shared/ooxml/namespaces.d.ts +149 -0
  245. package/dist/shared/ooxml/namespaces.d.ts.map +1 -0
  246. package/dist/shared/ooxml/namespaces.js +224 -0
  247. package/dist/shared/ooxml/namespaces.js.map +1 -0
  248. package/dist/shared/ooxml/types.d.ts +136 -0
  249. package/dist/shared/ooxml/types.d.ts.map +1 -0
  250. package/dist/shared/ooxml/types.js +7 -0
  251. package/dist/shared/ooxml/types.js.map +1 -0
  252. package/package.json +63 -6
@@ -0,0 +1,939 @@
1
+ /**
2
+ * Document Reconstructor
3
+ *
4
+ * Rebuilds document.xml from marked atoms with track changes.
5
+ * Generates w:ins, w:del, w:moveFrom, w:moveTo elements as appropriate.
6
+ */
7
+ import { CorrelationStatus } from '../../core-types.js';
8
+ import { getLeafText, childElements } from '../../primitives/index.js';
9
+ import { serializeToXml, cloneElement } from './xmlToWmlElement.js';
10
+ import { EMPTY_PARAGRAPH_TAG } from '../../atomizer.js';
11
+ import { areRunPropertiesEqual } from '../../format-detection.js';
12
+ import { debug } from './debug.js';
13
+ /**
14
+ * Create initial revision ID state.
15
+ */
16
+ function createRevisionIdState() {
17
+ return {
18
+ nextId: 1,
19
+ moveRangeIds: new Map(),
20
+ };
21
+ }
22
+ /**
23
+ * Allocate a new revision ID.
24
+ */
25
+ function allocateRevisionId(state) {
26
+ return state.nextId++;
27
+ }
28
+ /**
29
+ * Get or allocate move range IDs for a move name.
30
+ */
31
+ function getMoveRangeIds(state, moveName) {
32
+ let ids = state.moveRangeIds.get(moveName);
33
+ if (!ids) {
34
+ ids = {
35
+ sourceRangeId: allocateRevisionId(state),
36
+ destRangeId: allocateRevisionId(state),
37
+ };
38
+ state.moveRangeIds.set(moveName, ids);
39
+ }
40
+ return ids;
41
+ }
42
+ /**
43
+ * Format date for OOXML (ISO 8601).
44
+ */
45
+ function formatDate(date) {
46
+ return date.toISOString().replace(/\.\d{3}Z$/, 'Z');
47
+ }
48
+ /**
49
+ * Reconstruct document.xml from merged atoms with track changes.
50
+ *
51
+ * @param mergedAtoms - Atoms with correlation status set
52
+ * @param originalXml - Original document.xml for structure preservation
53
+ * @param options - Reconstruction options
54
+ * @returns New document.xml with track changes
55
+ */
56
+ export function reconstructDocument(mergedAtoms, originalXml, options) {
57
+ const { author, date } = options;
58
+ const dateStr = formatDate(date);
59
+ const revState = createRevisionIdState();
60
+ // Group atoms by paragraph
61
+ const rawParagraphGroups = groupAtomsByParagraph(mergedAtoms);
62
+ // Consolidate adjacent same-status changes for better readability
63
+ const paragraphGroups = consolidateAdjacentChanges(rawParagraphGroups);
64
+ // Reset debug counters
65
+ resetDebugCounters();
66
+ resetEmptyParagraphCounters();
67
+ debug('reconstructor', `${mergedAtoms.length} atoms -> ${paragraphGroups.length} paragraphs`);
68
+ // Build track changes XML for each paragraph
69
+ const paragraphXmls = [];
70
+ for (const group of paragraphGroups) {
71
+ const paragraphXml = buildParagraphXml(group, author, dateStr, revState);
72
+ paragraphXmls.push(paragraphXml);
73
+ }
74
+ const counters = getDebugCounters();
75
+ debug('reconstructor', `buildRunContent processed: ${counters.atoms} atoms, ${counters.wt} w:t elements`);
76
+ const emptyCounters = getEmptyParagraphCounters();
77
+ debug('reconstructor', `Empty paragraphs: inserted=${emptyCounters.inserted}, deleted=${emptyCounters.deleted}, equal=${emptyCounters.equal}, other=${emptyCounters.other}`);
78
+ // Reconstruct the document
79
+ return buildDocument(originalXml, paragraphXmls);
80
+ }
81
+ /**
82
+ * Group atoms by paragraph based on their ancestor chain.
83
+ *
84
+ * First sorts atoms by paragraphIndex to ensure all atoms belonging to the same
85
+ * paragraph are contiguous, then groups them sequentially.
86
+ */
87
+ function groupAtomsByParagraph(atoms) {
88
+ const groups = [];
89
+ let currentGroup = null;
90
+ let currentRunGroup = null;
91
+ const uniqueIndices = new Set(atoms.map(a => a.paragraphIndex));
92
+ debug('reconstructor', `groupAtomsByParagraph: ${atoms.length} atoms, ${uniqueIndices.size} unique paragraphIndices`);
93
+ // Sort atoms by paragraphIndex to ensure all atoms with the same index are contiguous.
94
+ // Use stable sort to preserve relative order within the same paragraph (deleted before inserted).
95
+ const sortedAtoms = [...atoms].sort((a, b) => {
96
+ const aIdx = a.paragraphIndex ?? Number.MAX_SAFE_INTEGER;
97
+ const bIdx = b.paragraphIndex ?? Number.MAX_SAFE_INTEGER;
98
+ return aIdx - bIdx;
99
+ });
100
+ for (const atom of sortedAtoms) {
101
+ // Find paragraph ancestor
102
+ const pAncestor = findAncestorByTag(atom, 'w:p');
103
+ const rAncestor = findAncestorByTag(atom, 'w:r');
104
+ // Check if we need a new paragraph
105
+ const pPr = pAncestor ? findChildByTag(pAncestor, 'w:pPr') : null;
106
+ // Pass currentRunGroup and current atom to check if we should start a new paragraph
107
+ // Uses paragraphIndex for comparison instead of object references
108
+ if (!currentGroup || shouldStartNewParagraph(currentGroup, currentRunGroup, atom)) {
109
+ if (currentRunGroup && currentGroup) {
110
+ currentGroup.runGroups.push(currentRunGroup);
111
+ }
112
+ currentRunGroup = null;
113
+ currentGroup = {
114
+ pPr: pPr ? cloneElement(pPr) : null,
115
+ runGroups: [],
116
+ };
117
+ groups.push(currentGroup);
118
+ }
119
+ // Check if we need a new run group
120
+ // Use the first-class rPr field from the atom when available,
121
+ // falling back to ancestor walk for atoms created before rPr was populated.
122
+ const atomRPr = getEffectiveAtomRPr(atom);
123
+ const rPr = atomRPr ?? (rAncestor ? findChildByTag(rAncestor, 'w:rPr') : null);
124
+ if (!currentRunGroup || shouldStartNewRunGroup(currentRunGroup, atom)) {
125
+ if (currentRunGroup) {
126
+ currentGroup.runGroups.push(currentRunGroup);
127
+ }
128
+ currentRunGroup = {
129
+ status: atom.correlationStatus,
130
+ atoms: [atom],
131
+ rPr: rPr ? cloneElement(rPr) : null,
132
+ moveName: atom.moveName,
133
+ };
134
+ }
135
+ else {
136
+ currentRunGroup.atoms.push(atom);
137
+ }
138
+ }
139
+ // Don't forget the last groups
140
+ if (currentRunGroup && currentGroup) {
141
+ currentGroup.runGroups.push(currentRunGroup);
142
+ }
143
+ return groups;
144
+ }
145
+ /**
146
+ * Check if a RunGroup contains only whitespace.
147
+ */
148
+ function isWhitespaceOnlyGroup(group) {
149
+ return group.atoms.every(atom => {
150
+ const text = getLeafText(atom.contentElement) ?? '';
151
+ return text.trim() === '';
152
+ });
153
+ }
154
+ /**
155
+ * Reorder atoms within change blocks.
156
+ *
157
+ * Identifies "change blocks" (contiguous regions with Del/Ins) and reorders
158
+ * to put all deletions first, then all insertions.
159
+ * Whitespace between changes is duplicated into both groups to preserve it
160
+ * regardless of accept/reject.
161
+ */
162
+ function reorderChangeBlocks(groups) {
163
+ for (const paraGroup of groups) {
164
+ const runGroups = paraGroup.runGroups;
165
+ const result = [];
166
+ let i = 0;
167
+ while (i < runGroups.length) {
168
+ const current = runGroups[i];
169
+ // Check if we're entering a change block
170
+ const isChange = current.status === CorrelationStatus.Deleted ||
171
+ current.status === CorrelationStatus.Inserted;
172
+ if (!isChange) {
173
+ result.push(current);
174
+ i++;
175
+ continue;
176
+ }
177
+ // Collect the entire change block
178
+ const deletions = [];
179
+ const insertions = [];
180
+ while (i < runGroups.length) {
181
+ const group = runGroups[i];
182
+ if (group.status === CorrelationStatus.Deleted) {
183
+ deletions.push(...group.atoms);
184
+ i++;
185
+ }
186
+ else if (group.status === CorrelationStatus.Inserted) {
187
+ insertions.push(...group.atoms);
188
+ i++;
189
+ }
190
+ else if (group.status === CorrelationStatus.Equal && isWhitespaceOnlyGroup(group)) {
191
+ // Duplicate whitespace into both deletions and insertions
192
+ // so it's preserved regardless of accept/reject
193
+ for (const atom of group.atoms) {
194
+ // Clone for deletions (mark as deleted)
195
+ const delAtom = {
196
+ ...atom,
197
+ correlationStatus: CorrelationStatus.Deleted,
198
+ };
199
+ deletions.push(delAtom);
200
+ // Clone for insertions (mark as inserted)
201
+ const insAtom = {
202
+ ...atom,
203
+ correlationStatus: CorrelationStatus.Inserted,
204
+ };
205
+ insertions.push(insAtom);
206
+ }
207
+ i++;
208
+ }
209
+ else {
210
+ // Non-whitespace Equal or other status - end of block
211
+ break;
212
+ }
213
+ }
214
+ // Output reordered: all deletions first, then all insertions
215
+ // rPr is set to null — buildRunContent will sub-group atoms by rPr
216
+ if (deletions.length > 0) {
217
+ result.push({
218
+ status: CorrelationStatus.Deleted,
219
+ atoms: deletions,
220
+ rPr: null,
221
+ });
222
+ }
223
+ if (insertions.length > 0) {
224
+ result.push({
225
+ status: CorrelationStatus.Inserted,
226
+ atoms: insertions,
227
+ rPr: null,
228
+ });
229
+ }
230
+ }
231
+ paraGroup.runGroups = result;
232
+ }
233
+ return groups;
234
+ }
235
+ /**
236
+ * Consolidate adjacent RunGroups with the same status within each paragraph.
237
+ *
238
+ * This makes change tracking more readable by grouping consecutive deletions
239
+ * together and consecutive insertions together, rather than interleaving them
240
+ * at the word level.
241
+ *
242
+ * For example, instead of:
243
+ * <del>word1</del><ins>word2</ins> <del>word3</del><ins>word4</ins>
244
+ *
245
+ * We get:
246
+ * <del>word1 word3</del><ins>word2 word4</ins>
247
+ */
248
+ function consolidateAdjacentChanges(groups) {
249
+ return reorderChangeBlocks(groups);
250
+ }
251
+ /**
252
+ * Find an ancestor element by tag name.
253
+ */
254
+ function findAncestorByTag(atom, tagName) {
255
+ for (let i = atom.ancestorElements.length - 1; i >= 0; i--) {
256
+ if (atom.ancestorElements[i].tagName === tagName) {
257
+ return atom.ancestorElements[i];
258
+ }
259
+ }
260
+ return null;
261
+ }
262
+ /**
263
+ * Find a child element by tag name.
264
+ */
265
+ function findChildByTag(element, tagName) {
266
+ for (let i = 0; i < element.childNodes.length; i++) {
267
+ const child = element.childNodes[i];
268
+ if (child.nodeType === 1 && child.tagName === tagName) {
269
+ return child;
270
+ }
271
+ }
272
+ return null;
273
+ }
274
+ /**
275
+ * Determine if we should start a new paragraph.
276
+ *
277
+ * Uses paragraphIndex for comparison instead of object references, because
278
+ * atoms from original and revised documents have different tree objects.
279
+ *
280
+ * @param currentGroup - The current paragraph group being built
281
+ * @param currentRunGroup - The current run group (may not be pushed to currentGroup yet)
282
+ * @param currentAtom - The current atom being processed
283
+ */
284
+ function shouldStartNewParagraph(currentGroup, currentRunGroup, currentAtom) {
285
+ const currentParagraphIndex = currentAtom.paragraphIndex;
286
+ // If no paragraph index, fall back to false (stay in current paragraph)
287
+ if (currentParagraphIndex === undefined)
288
+ return false;
289
+ // First check currentRunGroup (which may not be pushed to runGroups yet)
290
+ if (currentRunGroup && currentRunGroup.atoms.length > 0) {
291
+ const lastAtom = currentRunGroup.atoms[currentRunGroup.atoms.length - 1];
292
+ const lastParagraphIndex = lastAtom.paragraphIndex;
293
+ // Same paragraph index means same paragraph, even if from different trees
294
+ if (lastParagraphIndex !== undefined) {
295
+ return currentParagraphIndex !== lastParagraphIndex;
296
+ }
297
+ }
298
+ // Fall back to checking runGroups
299
+ if (currentGroup.runGroups.length === 0) {
300
+ return false;
301
+ }
302
+ // Check last atom's paragraph index
303
+ const lastRunGroup = currentGroup.runGroups[currentGroup.runGroups.length - 1];
304
+ if (!lastRunGroup || lastRunGroup.atoms.length === 0) {
305
+ return false;
306
+ }
307
+ const lastAtom = lastRunGroup.atoms[lastRunGroup.atoms.length - 1];
308
+ const lastParagraphIndex = lastAtom.paragraphIndex;
309
+ if (lastParagraphIndex !== undefined) {
310
+ return currentParagraphIndex !== lastParagraphIndex;
311
+ }
312
+ // No paragraph indices available, stay in current paragraph
313
+ return false;
314
+ }
315
+ /**
316
+ * Get the effective rPr for an atom — uses the first-class `rPr` field
317
+ * when available, otherwise returns null.
318
+ */
319
+ function getEffectiveAtomRPr(atom) {
320
+ return atom.rPr ?? null;
321
+ }
322
+ /**
323
+ * Determine if we should start a new run group.
324
+ */
325
+ function shouldStartNewRunGroup(currentGroup, atom) {
326
+ // Different status = new group
327
+ if (currentGroup.status !== atom.correlationStatus) {
328
+ return true;
329
+ }
330
+ // Different move name = new group
331
+ if (currentGroup.moveName !== atom.moveName) {
332
+ return true;
333
+ }
334
+ // Skip rPr splitting for MovedSource/MovedDestination to avoid
335
+ // duplicate move range markers (moveFromRangeStart/End)
336
+ if (currentGroup.status === CorrelationStatus.MovedSource ||
337
+ currentGroup.status === CorrelationStatus.MovedDestination) {
338
+ return false;
339
+ }
340
+ // Different rPr = new group (prevents formatting bleed between runs)
341
+ const currentRPr = getEffectiveAtomRPr(currentGroup.atoms[currentGroup.atoms.length - 1]);
342
+ const newRPr = getEffectiveAtomRPr(atom);
343
+ // Fast path: reference equality or both null
344
+ if (currentRPr === newRPr)
345
+ return false;
346
+ if (currentRPr === null && newRPr === null)
347
+ return false;
348
+ return !areRunPropertiesEqual(currentRPr, newRPr);
349
+ }
350
+ /**
351
+ * Check if a paragraph group represents an empty paragraph with a specific status.
352
+ *
353
+ * @param group - The paragraph group to check
354
+ * @param status - The correlation status to check for
355
+ * @returns True if all atoms are empty paragraph markers with the given status
356
+ */
357
+ function isEmptyParagraphWithStatus(group, status) {
358
+ // Check if all run groups contain only empty paragraph atoms with the given status
359
+ for (const runGroup of group.runGroups) {
360
+ // If any atom is not an empty paragraph marker, this is not an empty paragraph
361
+ const hasNonEmptyAtom = runGroup.atoms.some((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
362
+ if (hasNonEmptyAtom) {
363
+ return false;
364
+ }
365
+ // If any atom doesn't have the expected status, return false
366
+ const hasWrongStatus = runGroup.atoms.some((atom) => atom.correlationStatus !== status);
367
+ if (hasWrongStatus) {
368
+ return false;
369
+ }
370
+ }
371
+ // All atoms are empty paragraph markers with the expected status
372
+ return group.runGroups.length > 0;
373
+ }
374
+ // Debug counters for empty paragraphs
375
+ let debugEmptyParaInserted = 0;
376
+ let debugEmptyParaDeleted = 0;
377
+ let debugEmptyParaEqual = 0;
378
+ let debugEmptyParaOther = 0;
379
+ /**
380
+ * Reset empty paragraph debug counters.
381
+ */
382
+ export function resetEmptyParagraphCounters() {
383
+ debugEmptyParaInserted = 0;
384
+ debugEmptyParaDeleted = 0;
385
+ debugEmptyParaEqual = 0;
386
+ debugEmptyParaOther = 0;
387
+ }
388
+ /**
389
+ * Get empty paragraph debug counters.
390
+ */
391
+ export function getEmptyParagraphCounters() {
392
+ return {
393
+ inserted: debugEmptyParaInserted,
394
+ deleted: debugEmptyParaDeleted,
395
+ equal: debugEmptyParaEqual,
396
+ other: debugEmptyParaOther,
397
+ };
398
+ }
399
+ /**
400
+ * Check if a paragraph group contains only empty paragraph atoms.
401
+ */
402
+ function isEmptyParagraphGroup(group) {
403
+ for (const runGroup of group.runGroups) {
404
+ const hasNonEmptyAtom = runGroup.atoms.some((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
405
+ if (hasNonEmptyAtom) {
406
+ return false;
407
+ }
408
+ }
409
+ return group.runGroups.length > 0;
410
+ }
411
+ /**
412
+ * Build XML for a single paragraph with track changes.
413
+ */
414
+ function buildParagraphXml(group, author, dateStr, revState) {
415
+ // Track empty paragraph statuses for debugging
416
+ if (isEmptyParagraphGroup(group)) {
417
+ const status = group.runGroups[0]?.atoms[0]?.correlationStatus;
418
+ if (status === CorrelationStatus.Inserted) {
419
+ debugEmptyParaInserted++;
420
+ }
421
+ else if (status === CorrelationStatus.Deleted) {
422
+ debugEmptyParaDeleted++;
423
+ }
424
+ else if (status === CorrelationStatus.Equal) {
425
+ debugEmptyParaEqual++;
426
+ }
427
+ else {
428
+ debugEmptyParaOther++;
429
+ }
430
+ // Debug: log the first few empty paragraphs for investigation
431
+ const debugLimit = 5;
432
+ const totalEmpty = debugEmptyParaInserted + debugEmptyParaDeleted + debugEmptyParaEqual + debugEmptyParaOther;
433
+ if (totalEmpty <= debugLimit) {
434
+ const atoms = group.runGroups.flatMap(rg => rg.atoms);
435
+ const statuses = atoms.map(a => a.correlationStatus).join(', ');
436
+ debug('reconstructor', `Empty paragraph #${totalEmpty}: status=${status}, atomCount=${atoms.length}, atomStatuses=[${statuses}]`);
437
+ }
438
+ }
439
+ // Whole-paragraph insert/delete encoding must match Word/Aspose behavior.
440
+ //
441
+ // IMPORTANT: <w:ins> is not a container for <w:p> in WordprocessingML.
442
+ // Aspose encodes a paragraph insertion like:
443
+ // <w:p>
444
+ // <w:pPr><w:rPr><w:ins .../></w:rPr></w:pPr>
445
+ // <w:ins ...><w:r>...</w:r></w:ins>
446
+ // </w:p>
447
+ //
448
+ // That structure both renders in Word and allows Reject All to remove the paragraph
449
+ // entirely (instead of leaving behind a stub <w:p> break).
450
+ if (isEntireParagraphWithStatus(group, CorrelationStatus.Inserted)) {
451
+ const paraId = allocateRevisionId(revState);
452
+ const runId = allocateRevisionId(revState);
453
+ const parts = [];
454
+ parts.push('<w:p>');
455
+ parts.push(serializePPrWithParaRevisionMarker(group.pPr, `<w:ins w:id="${paraId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}" />`));
456
+ parts.push(`<w:ins w:id="${runId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
457
+ for (const runGroup of group.runGroups) {
458
+ parts.push(buildRunContentAsPlainRun(runGroup));
459
+ }
460
+ parts.push('</w:ins>');
461
+ parts.push('</w:p>');
462
+ return parts.join('');
463
+ }
464
+ if (isEntireParagraphWithStatus(group, CorrelationStatus.Deleted)) {
465
+ const paraId = allocateRevisionId(revState);
466
+ const runId = allocateRevisionId(revState);
467
+ const parts = [];
468
+ parts.push('<w:p>');
469
+ parts.push(serializePPrWithParaRevisionMarker(group.pPr, `<w:del w:id="${paraId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}" />`));
470
+ parts.push(`<w:del w:id="${runId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
471
+ for (const runGroup of group.runGroups) {
472
+ const plainRun = buildRunContentAsPlainRun(runGroup);
473
+ parts.push(plainRun.replace(/<w:t([^>]*)>([^<]*)<\/w:t>/g, '<w:delText$1>$2</w:delText>'));
474
+ }
475
+ parts.push('</w:del>');
476
+ parts.push('</w:p>');
477
+ return parts.join('');
478
+ }
479
+ // Check for inserted empty paragraphs - wrap entire paragraph in w:ins
480
+ if (isEmptyParagraphWithStatus(group, CorrelationStatus.Inserted)) {
481
+ const id = allocateRevisionId(revState);
482
+ const parts = [];
483
+ parts.push(`<w:ins w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
484
+ parts.push('<w:p>');
485
+ if (group.pPr) {
486
+ parts.push(serializeToXml(group.pPr));
487
+ }
488
+ parts.push('</w:p>');
489
+ parts.push('</w:ins>');
490
+ return parts.join('');
491
+ }
492
+ // Check for deleted empty paragraphs - wrap entire paragraph in w:del
493
+ if (isEmptyParagraphWithStatus(group, CorrelationStatus.Deleted)) {
494
+ const id = allocateRevisionId(revState);
495
+ const parts = [];
496
+ parts.push(`<w:del w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
497
+ parts.push('<w:p>');
498
+ if (group.pPr) {
499
+ parts.push(serializeToXml(group.pPr));
500
+ }
501
+ parts.push('</w:p>');
502
+ parts.push('</w:del>');
503
+ return parts.join('');
504
+ }
505
+ const parts = [];
506
+ parts.push('<w:p>');
507
+ // Add paragraph properties
508
+ if (group.pPr) {
509
+ parts.push(serializeToXml(group.pPr));
510
+ }
511
+ // Add run groups with track changes
512
+ for (const runGroup of group.runGroups) {
513
+ const runXml = buildRunGroupXml(runGroup, author, dateStr, revState);
514
+ parts.push(runXml);
515
+ }
516
+ parts.push('</w:p>');
517
+ return parts.join('');
518
+ }
519
+ /**
520
+ * Serialize paragraph properties and ensure a paragraph-level revision marker exists.
521
+ *
522
+ * If pPr is missing, synthesize one with rPr containing the marker.
523
+ */
524
+ function serializePPrWithParaRevisionMarker(pPr, markerXml) {
525
+ // Common case: no paragraph properties. Create minimal pPr/rPr.
526
+ if (!pPr) {
527
+ return `<w:pPr><w:rPr>${markerXml}</w:rPr></w:pPr>`;
528
+ }
529
+ let xml = serializeToXml(pPr);
530
+ // Handle self-closing <w:pPr/> form.
531
+ if (/<w:pPr\b[^>]*\/>/.test(xml)) {
532
+ return xml.replace(/<w:pPr\b([^>]*)\/>/, `<w:pPr$1><w:rPr>${markerXml}</w:rPr></w:pPr>`);
533
+ }
534
+ // If there's an rPr, inject the marker at the start of it.
535
+ if (xml.includes('<w:rPr')) {
536
+ return xml.replace(/<w:rPr(\b[^>]*)>/, `<w:rPr$1>${markerXml}`);
537
+ }
538
+ // Otherwise, add a new rPr with the marker before closing pPr.
539
+ return xml.replace(/<\/w:pPr>/, `<w:rPr>${markerXml}</w:rPr></w:pPr>`);
540
+ }
541
+ /**
542
+ * Returns true if every atom in the paragraph is of the specified status
543
+ * (ignoring EMPTY_PARAGRAPH_TAG markers).
544
+ */
545
+ function isEntireParagraphWithStatus(group, status) {
546
+ let sawAnyContent = false;
547
+ let sawTargetStatus = false;
548
+ for (const runGroup of group.runGroups) {
549
+ for (const atom of runGroup.atoms) {
550
+ const el = atom.contentElement;
551
+ if (el.tagName === EMPTY_PARAGRAPH_TAG)
552
+ continue;
553
+ sawAnyContent = true;
554
+ // A whole-paragraph wrap should still apply even if there are "noise" atoms
555
+ // (pure whitespace runs, tabs, breaks) marked Equal due to normalization or
556
+ // LCS alignment. Those atoms would otherwise prevent wrapping and Word would
557
+ // leave an empty <w:p> stub on Reject All.
558
+ const isWhitespaceOnlyText = el.tagName === 'w:t' && ((getLeafText(el) ?? '').trim() === '');
559
+ const isWhitespaceAtom = isWhitespaceOnlyText || el.tagName === 'w:tab' || el.tagName === 'w:br' || el.tagName === 'w:cr';
560
+ if (atom.correlationStatus === status) {
561
+ sawTargetStatus = true;
562
+ continue;
563
+ }
564
+ if (isWhitespaceAtom) {
565
+ continue; // ignore for whole-paragraph classification
566
+ }
567
+ return false;
568
+ }
569
+ }
570
+ // If there's no content at all, let the empty-paragraph handlers deal with it.
571
+ // Also require at least one atom with the target status so we don't wrap equal-only paragraphs.
572
+ return sawAnyContent && sawTargetStatus;
573
+ }
574
+ /**
575
+ * Build a <w:r> without track-change wrappers. Used when the whole paragraph is already
576
+ * wrapped (paragraph-level <w:ins>/<w:del>).
577
+ *
578
+ * When group.rPr is null, sub-groups atoms by per-atom rPr to prevent formatting bleed.
579
+ */
580
+ function buildRunContentAsPlainRun(group) {
581
+ const contentAtoms = group.atoms.filter((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
582
+ if (contentAtoms.length === 0)
583
+ return '';
584
+ // If group has explicit rPr, emit a single run
585
+ if (group.rPr !== null) {
586
+ return buildSingleRun(group.atoms, group.rPr);
587
+ }
588
+ // No group-level rPr — sub-group by per-atom rPr
589
+ const subGroups = subGroupByRPr(contentAtoms);
590
+ return subGroups.map(sg => buildSingleRun(sg.atoms, sg.rPr)).join('');
591
+ }
592
+ /**
593
+ * Build XML for a run group with appropriate track changes wrapper.
594
+ */
595
+ function buildRunGroupXml(group, author, dateStr, revState) {
596
+ const runContent = buildRunContent(group);
597
+ // If run content is empty (e.g., only empty paragraph atoms), return empty string
598
+ // This avoids generating empty track changes wrappers
599
+ if (!runContent) {
600
+ return '';
601
+ }
602
+ switch (group.status) {
603
+ case CorrelationStatus.Equal:
604
+ case CorrelationStatus.Unknown:
605
+ return runContent;
606
+ case CorrelationStatus.Inserted:
607
+ return wrapWithIns(runContent, author, dateStr, revState);
608
+ case CorrelationStatus.Deleted:
609
+ return wrapWithDel(runContent, author, dateStr, revState);
610
+ case CorrelationStatus.MovedSource:
611
+ return wrapWithMoveFrom(runContent, author, dateStr, group.moveName || 'move1', revState);
612
+ case CorrelationStatus.MovedDestination:
613
+ return wrapWithMoveTo(runContent, author, dateStr, group.moveName || 'move1', revState);
614
+ case CorrelationStatus.FormatChanged:
615
+ // For format changes, we include the run with rPrChange
616
+ return buildFormatChangeRun(group, author, dateStr, revState);
617
+ default:
618
+ return runContent;
619
+ }
620
+ }
621
+ // Debug counter for atoms processed
622
+ let debugAtomCounter = 0;
623
+ let debugWtCounter = 0;
624
+ /**
625
+ * Reset debug counters (for testing).
626
+ */
627
+ export function resetDebugCounters() {
628
+ debugAtomCounter = 0;
629
+ debugWtCounter = 0;
630
+ }
631
+ /**
632
+ * Get debug counters (for testing).
633
+ */
634
+ export function getDebugCounters() {
635
+ return { atoms: debugAtomCounter, wt: debugWtCounter };
636
+ }
637
+ /**
638
+ * Sub-group atoms by contiguous rPr — atoms with the same effective rPr
639
+ * stay in one sub-group, a change in rPr starts a new sub-group.
640
+ */
641
+ function subGroupByRPr(atoms) {
642
+ if (atoms.length === 0)
643
+ return [];
644
+ const result = [];
645
+ let currentRPr = getEffectiveAtomRPr(atoms[0]);
646
+ let currentAtoms = [atoms[0]];
647
+ for (let i = 1; i < atoms.length; i++) {
648
+ const atomRPr = getEffectiveAtomRPr(atoms[i]);
649
+ // Fast path: reference equality or both null
650
+ let same = currentRPr === atomRPr;
651
+ if (!same && currentRPr === null && atomRPr === null) {
652
+ same = true;
653
+ }
654
+ if (!same) {
655
+ same = areRunPropertiesEqual(currentRPr, atomRPr);
656
+ }
657
+ if (same) {
658
+ currentAtoms.push(atoms[i]);
659
+ }
660
+ else {
661
+ result.push({ rPr: currentRPr, atoms: currentAtoms });
662
+ currentRPr = atomRPr;
663
+ currentAtoms = [atoms[i]];
664
+ }
665
+ }
666
+ result.push({ rPr: currentRPr, atoms: currentAtoms });
667
+ return result;
668
+ }
669
+ /**
670
+ * Build a single <w:r> element from a set of atoms with the given rPr.
671
+ * Preserves pendingText coalescing, collapsedFieldAtoms expansion,
672
+ * and debug counter increments.
673
+ */
674
+ function buildSingleRun(atoms, rPr) {
675
+ const contentAtoms = atoms.filter((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
676
+ if (contentAtoms.length === 0)
677
+ return '';
678
+ const parts = [];
679
+ parts.push('<w:r>');
680
+ if (rPr)
681
+ parts.push(serializeToXml(rPr));
682
+ let pendingText = '';
683
+ const flushPendingText = () => {
684
+ if (!pendingText)
685
+ return;
686
+ const escaped = escapeXmlText(pendingText);
687
+ const needsPreserve = pendingText.startsWith(' ') ||
688
+ pendingText.endsWith(' ') ||
689
+ pendingText.includes(' ');
690
+ parts.push(needsPreserve
691
+ ? `<w:t xml:space="preserve">${escaped}</w:t>`
692
+ : `<w:t>${escaped}</w:t>`);
693
+ pendingText = '';
694
+ };
695
+ for (const atom of contentAtoms) {
696
+ debugAtomCounter++;
697
+ if (atom.collapsedFieldAtoms && atom.collapsedFieldAtoms.length > 0) {
698
+ flushPendingText();
699
+ for (const fieldAtom of atom.collapsedFieldAtoms) {
700
+ parts.push(serializeAtomElement(fieldAtom.contentElement));
701
+ }
702
+ continue;
703
+ }
704
+ const el = atom.contentElement;
705
+ if (el.tagName === 'w:t') {
706
+ pendingText += getLeafText(el) ?? '';
707
+ continue;
708
+ }
709
+ flushPendingText();
710
+ parts.push(serializeAtomElement(el));
711
+ }
712
+ flushPendingText();
713
+ parts.push('</w:r>');
714
+ return parts.join('');
715
+ }
716
+ /**
717
+ * Serialize an atom's content element to XML string.
718
+ */
719
+ function serializeAtomElement(element) {
720
+ if (element.tagName === 'w:t') {
721
+ debugWtCounter++;
722
+ // Text element - preserve xml:space if needed
723
+ const text = escapeXmlText(getLeafText(element) ?? '');
724
+ if (text.startsWith(' ') || text.endsWith(' ') || text.includes(' ')) {
725
+ return `<w:t xml:space="preserve">${text}</w:t>`;
726
+ }
727
+ else {
728
+ return `<w:t>${text}</w:t>`;
729
+ }
730
+ }
731
+ else if (element.tagName === 'w:br') {
732
+ return '<w:br/>';
733
+ }
734
+ else if (element.tagName === 'w:tab') {
735
+ return '<w:tab/>';
736
+ }
737
+ else if (element.tagName === 'w:cr') {
738
+ return '<w:cr/>';
739
+ }
740
+ else {
741
+ // Other elements (including field chars, instrText) - serialize as-is
742
+ return serializeToXml(element);
743
+ }
744
+ }
745
+ /**
746
+ * Build the content of a run from atoms.
747
+ *
748
+ * Returns empty string if all atoms are empty paragraph markers,
749
+ * which ensures no empty <w:r> elements are generated.
750
+ *
751
+ * When group.rPr is non-null, emits a single <w:r> with that rPr.
752
+ * When group.rPr is null (e.g., after reorderChangeBlocks merges atoms
753
+ * from multiple original RunGroups), sub-groups atoms by their per-atom
754
+ * rPr and emits one <w:r> per sub-group to prevent formatting bleed.
755
+ */
756
+ function buildRunContent(group) {
757
+ // Check if this run group contains only empty paragraph atoms
758
+ const contentAtoms = group.atoms.filter((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
759
+ // If no content atoms, return empty string (don't generate empty run)
760
+ if (contentAtoms.length === 0) {
761
+ return '';
762
+ }
763
+ // If group has explicit rPr, emit a single run
764
+ if (group.rPr !== null) {
765
+ return buildSingleRun(group.atoms, group.rPr);
766
+ }
767
+ // No group-level rPr — sub-group by per-atom rPr
768
+ const subGroups = subGroupByRPr(contentAtoms);
769
+ return subGroups.map(sg => buildSingleRun(sg.atoms, sg.rPr)).join('');
770
+ }
771
+ /**
772
+ * Wrap content with w:ins element.
773
+ */
774
+ function wrapWithIns(content, author, dateStr, revState) {
775
+ const id = allocateRevisionId(revState);
776
+ return `<w:ins w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${content}</w:ins>`;
777
+ }
778
+ /**
779
+ * Wrap content with w:del element.
780
+ */
781
+ function wrapWithDel(content, author, dateStr, revState) {
782
+ const id = allocateRevisionId(revState);
783
+ // For deletions, we need to convert w:t to w:delText
784
+ const delContent = content.replace(/<w:t([^>]*)>([^<]*)<\/w:t>/g, '<w:delText$1>$2</w:delText>');
785
+ return `<w:del w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${delContent}</w:del>`;
786
+ }
787
+ /**
788
+ * Wrap content with w:moveFrom elements.
789
+ */
790
+ function wrapWithMoveFrom(content, author, dateStr, moveName, revState) {
791
+ const ids = getMoveRangeIds(revState, moveName);
792
+ const moveId = allocateRevisionId(revState);
793
+ // Convert w:t to w:delText for moved-from content
794
+ const delContent = content.replace(/<w:t([^>]*)>([^<]*)<\/w:t>/g, '<w:delText$1>$2</w:delText>');
795
+ return (`<w:moveFromRangeStart w:id="${ids.sourceRangeId}" w:name="${moveName}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}"/>` +
796
+ `<w:moveFrom w:id="${moveId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${delContent}</w:moveFrom>` +
797
+ `<w:moveFromRangeEnd w:id="${ids.sourceRangeId}"/>`);
798
+ }
799
+ /**
800
+ * Wrap content with w:moveTo elements.
801
+ */
802
+ function wrapWithMoveTo(content, author, dateStr, moveName, revState) {
803
+ const ids = getMoveRangeIds(revState, moveName);
804
+ const moveId = allocateRevisionId(revState);
805
+ return (`<w:moveToRangeStart w:id="${ids.destRangeId}" w:name="${moveName}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}"/>` +
806
+ `<w:moveTo w:id="${moveId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${content}</w:moveTo>` +
807
+ `<w:moveToRangeEnd w:id="${ids.destRangeId}"/>`);
808
+ }
809
+ /**
810
+ * Build run with format change tracking (w:rPrChange).
811
+ */
812
+ function buildFormatChangeRun(group, author, dateStr, revState) {
813
+ const parts = [];
814
+ parts.push('<w:r>');
815
+ // Build rPr with rPrChange
816
+ const effectiveRPr = group.rPr ?? group.atoms[0]?.rPr ?? null;
817
+ if (effectiveRPr || group.atoms[0]?.formatChange) {
818
+ parts.push('<w:rPr>');
819
+ // Current properties
820
+ if (effectiveRPr) {
821
+ for (const child of childElements(effectiveRPr)) {
822
+ if (child.tagName !== 'w:rPrChange') {
823
+ parts.push(serializeToXml(child));
824
+ }
825
+ }
826
+ }
827
+ // Add rPrChange with old properties
828
+ const formatChange = group.atoms[0]?.formatChange;
829
+ if (formatChange?.oldRunProperties) {
830
+ const id = allocateRevisionId(revState);
831
+ parts.push(`<w:rPrChange w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
832
+ for (const child of childElements(formatChange.oldRunProperties)) {
833
+ parts.push(serializeToXml(child));
834
+ }
835
+ parts.push('</w:rPrChange>');
836
+ }
837
+ parts.push('</w:rPr>');
838
+ }
839
+ // Add atom content
840
+ for (const atom of group.atoms) {
841
+ const element = atom.contentElement;
842
+ if (element.tagName === 'w:t') {
843
+ const text = escapeXmlText(getLeafText(element) ?? '');
844
+ if (text.startsWith(' ') || text.endsWith(' ') || text.includes(' ')) {
845
+ parts.push(`<w:t xml:space="preserve">${text}</w:t>`);
846
+ }
847
+ else {
848
+ parts.push(`<w:t>${text}</w:t>`);
849
+ }
850
+ }
851
+ else {
852
+ parts.push(serializeToXml(element));
853
+ }
854
+ }
855
+ parts.push('</w:r>');
856
+ return parts.join('');
857
+ }
858
+ /**
859
+ * Build the final document by replacing body content.
860
+ *
861
+ * Note: sectPr elements are NOT extracted and appended separately because:
862
+ * 1. Section properties inside pPr elements are already preserved in the reconstructed paragraphs
863
+ * 2. The regex to extract "final sectPr" was incorrectly matching sectPr inside pPr elements
864
+ * and capturing large amounts of body content, causing duplicate text.
865
+ */
866
+ function buildDocument(originalXml, paragraphXmls) {
867
+ // Extract document structure
868
+ const bodyMatch = originalXml.match(/(<w:body[^>]*>)([\s\S]*?)(<\/w:body>)/);
869
+ if (!bodyMatch) {
870
+ throw new Error('Could not find w:body in document');
871
+ }
872
+ const beforeBody = originalXml.slice(0, originalXml.indexOf(bodyMatch[0]));
873
+ const bodyOpenTag = bodyMatch[1];
874
+ const bodyCloseTag = bodyMatch[3];
875
+ const afterBody = originalXml.slice(originalXml.indexOf(bodyMatch[0]) + bodyMatch[0].length);
876
+ // Build new body (no separate sectPr extraction - it's in the paragraphs' pPr)
877
+ const newBodyContent = paragraphXmls.join('\n');
878
+ return beforeBody + bodyOpenTag + '\n' + newBodyContent + '\n' + bodyCloseTag + afterBody;
879
+ }
880
+ /**
881
+ * Escape XML text content.
882
+ */
883
+ function escapeXmlText(text) {
884
+ return text
885
+ .replace(/&/g, '&amp;')
886
+ .replace(/</g, '&lt;')
887
+ .replace(/>/g, '&gt;');
888
+ }
889
+ /**
890
+ * Escape XML attribute value.
891
+ */
892
+ function escapeXmlAttr(text) {
893
+ return text
894
+ .replace(/&/g, '&amp;')
895
+ .replace(/</g, '&lt;')
896
+ .replace(/>/g, '&gt;')
897
+ .replace(/"/g, '&quot;');
898
+ }
899
+ /**
900
+ * Count statistics from merged atoms.
901
+ */
902
+ export function computeReconstructionStats(mergedAtoms) {
903
+ let insertions = 0;
904
+ let deletions = 0;
905
+ let moves = 0;
906
+ let formatChanges = 0;
907
+ const paragraphs = new Set();
908
+ for (const atom of mergedAtoms) {
909
+ // Count paragraph
910
+ const pAncestor = findAncestorByTag(atom, 'w:p');
911
+ if (pAncestor) {
912
+ paragraphs.add(pAncestor);
913
+ }
914
+ // Count by status
915
+ switch (atom.correlationStatus) {
916
+ case CorrelationStatus.Inserted:
917
+ insertions++;
918
+ break;
919
+ case CorrelationStatus.Deleted:
920
+ deletions++;
921
+ break;
922
+ case CorrelationStatus.MovedSource:
923
+ case CorrelationStatus.MovedDestination:
924
+ moves++;
925
+ break;
926
+ case CorrelationStatus.FormatChanged:
927
+ formatChanges++;
928
+ break;
929
+ }
930
+ }
931
+ return {
932
+ paragraphs: paragraphs.size,
933
+ insertions,
934
+ deletions,
935
+ moves: Math.floor(moves / 2), // Source and destination counted separately
936
+ formatChanges,
937
+ };
938
+ }
939
+ //# sourceMappingURL=documentReconstructor.js.map