@usejunior/docx-core 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +0 -2
  2. package/dist/.tsbuildinfo +1 -1
  3. package/dist/atomizer.d.ts.map +1 -1
  4. package/dist/atomizer.js +16 -3
  5. package/dist/atomizer.js.map +1 -1
  6. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
  7. package/dist/baselines/atomizer/documentReconstructor.js +11 -5
  8. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
  9. package/dist/baselines/atomizer/inPlaceModifier.d.ts +81 -1
  10. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
  11. package/dist/baselines/atomizer/inPlaceModifier.js +618 -34
  12. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
  13. package/dist/baselines/atomizer/pipeline.d.ts +1 -1
  14. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
  15. package/dist/baselines/atomizer/pipeline.js +70 -1
  16. package/dist/baselines/atomizer/pipeline.js.map +1 -1
  17. package/dist/baselines/atomizer/premergeRuns.d.ts.map +1 -1
  18. package/dist/baselines/atomizer/premergeRuns.js +3 -0
  19. package/dist/baselines/atomizer/premergeRuns.js.map +1 -1
  20. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
  21. package/dist/baselines/atomizer/trackChangesAcceptorAst.js +63 -33
  22. package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
  23. package/dist/cli/compare-two.js +4 -4
  24. package/dist/cli/compare-two.js.map +1 -1
  25. package/dist/index.d.ts +6 -6
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.js +7 -7
  28. package/dist/index.js.map +1 -1
  29. package/dist/primitives/document.d.ts +12 -0
  30. package/dist/primitives/document.d.ts.map +1 -1
  31. package/dist/primitives/document.js +17 -2
  32. package/dist/primitives/document.js.map +1 -1
  33. package/dist/primitives/document_view.d.ts +2 -0
  34. package/dist/primitives/document_view.d.ts.map +1 -1
  35. package/dist/primitives/document_view.js +3 -2
  36. package/dist/primitives/document_view.js.map +1 -1
  37. package/dist/primitives/formatting_tags.d.ts +14 -2
  38. package/dist/primitives/formatting_tags.d.ts.map +1 -1
  39. package/dist/primitives/formatting_tags.js +13 -11
  40. package/dist/primitives/formatting_tags.js.map +1 -1
  41. package/dist/primitives/matching.d.ts +12 -0
  42. package/dist/primitives/matching.d.ts.map +1 -1
  43. package/dist/primitives/matching.js +54 -0
  44. package/dist/primitives/matching.js.map +1 -1
  45. package/dist/primitives/merge_runs.d.ts.map +1 -1
  46. package/dist/primitives/merge_runs.js +48 -0
  47. package/dist/primitives/merge_runs.js.map +1 -1
  48. package/dist/shared/validators/structural.d.ts +31 -0
  49. package/dist/shared/validators/structural.d.ts.map +1 -0
  50. package/dist/shared/validators/structural.js +110 -0
  51. package/dist/shared/validators/structural.js.map +1 -0
  52. package/package.json +2 -2
  53. package/dist/baselines/diffmatch/documentBuilder.d.ts +0 -44
  54. package/dist/baselines/diffmatch/documentBuilder.d.ts.map +0 -1
  55. package/dist/baselines/diffmatch/documentBuilder.js +0 -227
  56. package/dist/baselines/diffmatch/documentBuilder.js.map +0 -1
  57. package/dist/baselines/diffmatch/paragraphAlignment.d.ts +0 -75
  58. package/dist/baselines/diffmatch/paragraphAlignment.d.ts.map +0 -1
  59. package/dist/baselines/diffmatch/paragraphAlignment.js +0 -206
  60. package/dist/baselines/diffmatch/paragraphAlignment.js.map +0 -1
  61. package/dist/baselines/diffmatch/pipeline.d.ts +0 -33
  62. package/dist/baselines/diffmatch/pipeline.d.ts.map +0 -1
  63. package/dist/baselines/diffmatch/pipeline.js +0 -84
  64. package/dist/baselines/diffmatch/pipeline.js.map +0 -1
  65. package/dist/baselines/diffmatch/runDiff.d.ts +0 -53
  66. package/dist/baselines/diffmatch/runDiff.d.ts.map +0 -1
  67. package/dist/baselines/diffmatch/runDiff.js +0 -253
  68. package/dist/baselines/diffmatch/runDiff.js.map +0 -1
  69. package/dist/baselines/diffmatch/trackChangesRenderer.d.ts +0 -64
  70. package/dist/baselines/diffmatch/trackChangesRenderer.d.ts.map +0 -1
  71. package/dist/baselines/diffmatch/trackChangesRenderer.js +0 -178
  72. package/dist/baselines/diffmatch/trackChangesRenderer.js.map +0 -1
  73. package/dist/baselines/diffmatch/xmlParser.d.ts +0 -45
  74. package/dist/baselines/diffmatch/xmlParser.d.ts.map +0 -1
  75. package/dist/baselines/diffmatch/xmlParser.js +0 -216
  76. package/dist/baselines/diffmatch/xmlParser.js.map +0 -1
  77. package/dist/benchmark/metrics.d.ts +0 -72
  78. package/dist/benchmark/metrics.d.ts.map +0 -1
  79. package/dist/benchmark/metrics.js +0 -45
  80. package/dist/benchmark/metrics.js.map +0 -1
  81. package/dist/benchmark/reporter.d.ts +0 -23
  82. package/dist/benchmark/reporter.d.ts.map +0 -1
  83. package/dist/benchmark/reporter.js +0 -147
  84. package/dist/benchmark/reporter.js.map +0 -1
  85. package/dist/benchmark/runner.d.ts +0 -30
  86. package/dist/benchmark/runner.d.ts.map +0 -1
  87. package/dist/benchmark/runner.js +0 -233
  88. package/dist/benchmark/runner.js.map +0 -1
@@ -12,7 +12,8 @@
12
12
  */
13
13
  import { CorrelationStatus } from '../../core-types.js';
14
14
  import { EMPTY_PARAGRAPH_TAG } from '../../atomizer.js';
15
- import { getLeafText, childElements, findChildByTagName, insertAfterElement, wrapElement, splitRunAtVisibleOffset, visibleLengthForEl, getDirectContentElements, } from '../../primitives/index.js';
15
+ import { getLeafText, childElements, findChildByTagName, insertAfterElement, wrapElement, unwrapElement, splitRunAtVisibleOffset, visibleLengthForEl, getDirectContentElements, } from '../../primitives/index.js';
16
+ import { areRunPropertiesEqual } from '../../format-detection.js';
16
17
  import { enforceConsumerCompatibility } from './consumerCompatibility.js';
17
18
  import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
18
19
  import { warn } from './debug.js';
@@ -126,8 +127,9 @@ function formatDate(date) {
126
127
  * @param element - The element to process
127
128
  */
128
129
  function convertToDelText(element) {
129
- if (element.tagName === 'w:t') {
130
- const newEl = createEl('w:delText');
130
+ if (element.tagName === 'w:t' || element.tagName === 'w:instrText') {
131
+ const newTag = element.tagName === 'w:t' ? 'w:delText' : 'w:delInstrText';
132
+ const newEl = createEl(newTag);
131
133
  // Copy text content
132
134
  while (element.firstChild)
133
135
  newEl.appendChild(element.firstChild);
@@ -137,10 +139,6 @@ function convertToDelText(element) {
137
139
  newEl.setAttribute(attr.name, attr.value);
138
140
  }
139
141
  element.parentNode?.replaceChild(newEl, element);
140
- // Recurse into children of the new element (none expected for w:t, but be safe)
141
- for (const child of childElements(newEl)) {
142
- convertToDelText(child);
143
- }
144
142
  return;
145
143
  }
146
144
  for (const child of childElements(element)) {
@@ -153,9 +151,22 @@ function convertToDelText(element) {
153
151
  * Collapsed field atoms use a synthetic w:t as their top-level contentElement,
154
152
  * but retain the original field sequence in collapsedFieldAtoms. For insertion,
155
153
  * we must replay the original sequence rather than the synthetic text.
154
+ *
155
+ * @param filterRun - When provided, only return content elements whose
156
+ * collapsed field atom belongs to this specific source run. Used by
157
+ * insertDeletedRun/insertMoveFromRun to emit one cloned run per original
158
+ * source run, preserving multi-run field structure.
156
159
  */
157
- function getInsertableAtomContentElements(atom) {
160
+ function getInsertableAtomContentElements(atom, filterRun) {
158
161
  if (atom.collapsedFieldAtoms && atom.collapsedFieldAtoms.length > 0) {
162
+ if (filterRun) {
163
+ return atom.collapsedFieldAtoms
164
+ .filter((fieldAtom) => {
165
+ const run = fieldAtom.sourceRunElement ?? findAncestorByTag(fieldAtom, 'w:r');
166
+ return run === filterRun;
167
+ })
168
+ .map((fieldAtom) => fieldAtom.contentElement);
169
+ }
159
170
  return atom.collapsedFieldAtoms.map((fieldAtom) => fieldAtom.contentElement);
160
171
  }
161
172
  return [atom.contentElement];
@@ -164,8 +175,11 @@ function getInsertableAtomContentElements(atom) {
164
175
  * Clone a source run and replace its non-rPr children with atom content.
165
176
  *
166
177
  * This keeps run-level formatting while allowing atom-level fragment insertion.
178
+ *
179
+ * @param filterRun - When provided, only include content elements belonging
180
+ * to this source run (for multi-run collapsed field replay).
167
181
  */
168
- function cloneRunWithAtomContent(sourceRun, atom) {
182
+ function cloneRunWithAtomContent(sourceRun, atom, filterRun) {
169
183
  const clonedRun = sourceRun.cloneNode(true);
170
184
  const retainedChildren = [];
171
185
  for (const child of childElements(clonedRun)) {
@@ -180,7 +194,7 @@ function cloneRunWithAtomContent(sourceRun, atom) {
180
194
  for (const child of retainedChildren) {
181
195
  clonedRun.appendChild(child);
182
196
  }
183
- for (const contentElement of getInsertableAtomContentElements(atom)) {
197
+ for (const contentElement of getInsertableAtomContentElements(atom, filterRun)) {
184
198
  const fragment = contentElement.cloneNode(true);
185
199
  clonedRun.appendChild(fragment);
186
200
  }
@@ -599,12 +613,6 @@ export function insertDeletedRun(deletedAtom, insertAfterRun, targetParagraph, a
599
613
  if (!sourceRun) {
600
614
  return null;
601
615
  }
602
- // Clone only the atom fragment while preserving run-level formatting.
603
- // For collapsed fields, replay the original field sequence rather than
604
- // the synthetic collapsed w:t placeholder.
605
- const clonedRun = cloneRunWithAtomContent(sourceRun, deletedAtom);
606
- // Convert w:t to w:delText
607
- convertToDelText(clonedRun);
608
616
  // Create w:del wrapper
609
617
  const id = allocateRevisionId(state);
610
618
  const del = createEl('w:del', {
@@ -612,8 +620,22 @@ export function insertDeletedRun(deletedAtom, insertAfterRun, targetParagraph, a
612
620
  'w:author': author,
613
621
  'w:date': dateStr,
614
622
  });
615
- // Add cloned run as child of del
616
- del.appendChild(clonedRun);
623
+ // For collapsed field atoms, replay one cloned run per original source run
624
+ // to preserve multi-run field structure. Without this, all field elements
625
+ // get packed into a single run, breaking Word's field parsing.
626
+ const runs = getAtomRuns(deletedAtom);
627
+ if (runs.length > 1) {
628
+ for (const run of runs) {
629
+ const clonedRun = cloneRunWithAtomContent(run, deletedAtom, run);
630
+ convertToDelText(clonedRun);
631
+ del.appendChild(clonedRun);
632
+ }
633
+ }
634
+ else {
635
+ const clonedRun = cloneRunWithAtomContent(sourceRun, deletedAtom);
636
+ convertToDelText(clonedRun);
637
+ del.appendChild(clonedRun);
638
+ }
617
639
  // Insert at correct position
618
640
  if (insertAfterRun) {
619
641
  insertAfterElement(insertAfterRun, del);
@@ -655,12 +677,21 @@ export function insertMoveFromRun(atom, moveName, insertAfterRun, targetParagrap
655
677
  if (!sourceRun) {
656
678
  return null;
657
679
  }
658
- // Clone only the atom fragment while preserving run-level formatting.
659
- // For collapsed fields, replay the original field sequence rather than
660
- // the synthetic collapsed w:t placeholder.
661
- const clonedRun = cloneRunWithAtomContent(sourceRun, atom);
662
- // Convert w:t to w:delText (moved-from content appears as deleted)
663
- convertToDelText(clonedRun);
680
+ // For collapsed field atoms, replay one cloned run per original source run.
681
+ const runs = getAtomRuns(atom);
682
+ const clonedRuns = [];
683
+ if (runs.length > 1) {
684
+ for (const run of runs) {
685
+ const clonedRun = cloneRunWithAtomContent(run, atom, run);
686
+ convertToDelText(clonedRun);
687
+ clonedRuns.push(clonedRun);
688
+ }
689
+ }
690
+ else {
691
+ const clonedRun = cloneRunWithAtomContent(sourceRun, atom);
692
+ convertToDelText(clonedRun);
693
+ clonedRuns.push(clonedRun);
694
+ }
664
695
  // Get or allocate move range IDs
665
696
  const ids = getMoveRangeIds(state, moveName);
666
697
  const moveId = allocateRevisionId(state);
@@ -681,8 +712,10 @@ export function insertMoveFromRun(atom, moveName, insertAfterRun, targetParagrap
681
712
  const rangeEnd = createEl('w:moveFromRangeEnd', {
682
713
  'w:id': String(ids.sourceRangeId),
683
714
  });
684
- // Add cloned run as child of moveFrom
685
- moveFrom.appendChild(clonedRun);
715
+ // Add cloned run(s) as children of moveFrom
716
+ for (const clonedRun of clonedRuns) {
717
+ moveFrom.appendChild(clonedRun);
718
+ }
686
719
  // Insert at correct position: rangeStart -> moveFrom(run) -> rangeEnd
687
720
  if (insertAfterRun) {
688
721
  insertAfterElement(insertAfterRun, rangeStart);
@@ -910,6 +943,27 @@ export function addParagraphPropertyChange(paragraph, author, dateStr, state) {
910
943
  pPrChange.appendChild(oldPPr);
911
944
  pPr.appendChild(pPrChange); // pPrChange goes last in pPr per schema
912
945
  }
946
+ /**
947
+ * Tag names that represent visible content inside a w:r element.
948
+ * A run containing at least one of these is considered substantive (non-empty).
949
+ */
950
+ const RUN_VISIBLE_CONTENT_TAGS = new Set([
951
+ 'w:t', 'w:tab', 'w:br', 'w:cr', 'w:drawing', 'w:object', 'w:pict',
952
+ 'w:sym', 'w:fldChar', 'w:instrText',
953
+ ]);
954
+ /**
955
+ * Returns true if a w:r element contains at least one visible content child.
956
+ * Empty runs (containing only w:rPr or nothing) return false.
957
+ */
958
+ export function runHasVisibleContent(run) {
959
+ for (let i = 0; i < run.childNodes.length; i++) {
960
+ const child = run.childNodes[i];
961
+ if (child.nodeType === 1 && RUN_VISIBLE_CONTENT_TAGS.has(child.tagName)) {
962
+ return true;
963
+ }
964
+ }
965
+ return false;
966
+ }
913
967
  /**
914
968
  * Wrap an inserted empty paragraph with <w:ins>.
915
969
  *
@@ -920,13 +974,44 @@ export function addParagraphPropertyChange(paragraph, author, dateStr, state) {
920
974
  * @param dateStr - Formatted date
921
975
  * @param state - Revision ID state
922
976
  */
923
- export function wrapParagraphAsInserted(_paragraph, _author, _dateStr, _state) {
924
- // No-op: paragraph-mark w:ins markers and pPrChange inside pPr/rPr are valid
925
- // OOXML but Google Docs ignores (or actively hides) w:ins-wrapped runs when
926
- // they coexist with those markers. Since the individual runs in an inserted
927
- // paragraph are already wrapped with <w:ins> by the comparison engine, the
928
- // paragraph-level markers are redundant and omitting them maximises
929
- // cross-application compatibility.
977
+ export function wrapParagraphAsInserted(paragraph, author, dateStr, state) {
978
+ // For paragraphs with substantive run content: skip the paragraph-mark marker.
979
+ // Google Docs ignores (or actively hides) w:ins-wrapped runs when they
980
+ // coexist with PPR-INS markers. Since individual runs are already wrapped
981
+ // with <w:ins>, the paragraph-level marker is redundant for non-empty
982
+ // paragraphs and omitting it maximises cross-application compatibility.
983
+ //
984
+ // For empty paragraphs (no runs, or only empty w:r shells): we MUST add
985
+ // the PPR-INS marker so that Reject All removes the paragraph. Without it,
986
+ // the empty paragraph shell survives reject, causing round-trip safety failures.
987
+ //
988
+ // Important: empty <w:r> elements (no w:t, w:tab, w:br, etc.) should NOT
989
+ // count as substantive content. They are empty shells that don't produce
990
+ // visible output and should not prevent PPR-INS from being added.
991
+ let hasSubstantiveContent = false;
992
+ for (const child of childElements(paragraph)) {
993
+ if (child.tagName === 'w:ins') {
994
+ // Check if the w:ins wrapper contains runs with visible content
995
+ for (let i = 0; i < child.childNodes.length; i++) {
996
+ const insChild = child.childNodes[i];
997
+ if (insChild.nodeType === 1 && insChild.tagName === 'w:r' &&
998
+ runHasVisibleContent(insChild)) {
999
+ hasSubstantiveContent = true;
1000
+ break;
1001
+ }
1002
+ }
1003
+ if (hasSubstantiveContent)
1004
+ break;
1005
+ }
1006
+ else if (child.tagName === 'w:r' && runHasVisibleContent(child)) {
1007
+ hasSubstantiveContent = true;
1008
+ break;
1009
+ }
1010
+ }
1011
+ if (hasSubstantiveContent) {
1012
+ return true;
1013
+ }
1014
+ addParagraphMarkRevisionMarker(paragraph, 'w:ins', author, dateStr, state);
930
1015
  return true;
931
1016
  }
932
1017
  /**
@@ -1080,6 +1165,134 @@ export function preSplitMixedStatusRuns(mergedAtoms) {
1080
1165
  }
1081
1166
  }
1082
1167
  }
1168
+ /**
1169
+ * Pre-split revised-tree runs where word-split Equal atoms from the same run
1170
+ * are interleaved with Deleted/MovedSource atoms in the merged atom list.
1171
+ *
1172
+ * `preSplitMixedStatusRuns` handles the case where a single run contains atoms
1173
+ * with DIFFERENT statuses (e.g., some Equal and some Inserted). But it cannot
1174
+ * handle the case where ALL atoms from a run are Equal yet Deleted atoms (from
1175
+ * the original tree) are interspersed between them in the merged list.
1176
+ *
1177
+ * Without this split, `handleEqual` sees all Equal atoms pointing to the same
1178
+ * run and skips position advancement (the `lastRevisedRunAnchor` optimization).
1179
+ * Subsequent `handleDeleted` calls then insert deleted content at the wrong
1180
+ * position because the cursor never advanced past the shared run.
1181
+ *
1182
+ * This function detects interleaved sequences and splits the DOM run so each
1183
+ * contiguous group of Equal atoms gets its own run fragment. The handlers then
1184
+ * advance the cursor correctly across fragments.
1185
+ */
1186
+ export function preSplitInterleavedWordRuns(mergedAtoms) {
1187
+ const runToGroups = new Map();
1188
+ // Track cumulative offset per run (sums visible lengths of atoms seen so far)
1189
+ const runToOffset = new Map();
1190
+ let lastRevisedRun = null;
1191
+ for (const atom of mergedAtoms) {
1192
+ // Skip atoms from the original tree (Deleted/MovedSource have runs in the
1193
+ // original tree, not the revised tree).
1194
+ if (atom.correlationStatus === CorrelationStatus.Deleted ||
1195
+ atom.correlationStatus === CorrelationStatus.MovedSource) {
1196
+ // A Deleted/MovedSource atom between Equal atoms from the same run
1197
+ // creates an interleaving gap. Mark this by clearing lastRevisedRun
1198
+ // so the next Equal atom from the same run starts a new group.
1199
+ lastRevisedRun = null;
1200
+ continue;
1201
+ }
1202
+ const run = atom.sourceRunElement;
1203
+ if (!run)
1204
+ continue;
1205
+ // Skip collapsed field atoms — multi-run field sequences.
1206
+ if (atom.collapsedFieldAtoms && atom.collapsedFieldAtoms.length > 0)
1207
+ continue;
1208
+ // Skip field character elements — semantically fragile.
1209
+ if (FIELD_CHAR_TAG_NAMES.has(atom.contentElement.tagName))
1210
+ continue;
1211
+ const atomLen = atomContentVisibleLength(atom.contentElement);
1212
+ const currentOffset = runToOffset.get(run) ?? 0;
1213
+ runToOffset.set(run, currentOffset + atomLen);
1214
+ const groups = runToGroups.get(run);
1215
+ if (!groups) {
1216
+ // First time seeing this run — create initial group.
1217
+ runToGroups.set(run, [{
1218
+ startOffset: currentOffset,
1219
+ length: atomLen,
1220
+ atoms: [atom],
1221
+ }]);
1222
+ lastRevisedRun = run;
1223
+ continue;
1224
+ }
1225
+ if (lastRevisedRun === run) {
1226
+ // Contiguous with the previous atom from the same run — extend group.
1227
+ const lastGroup = groups[groups.length - 1];
1228
+ lastGroup.length += atomLen;
1229
+ lastGroup.atoms.push(atom);
1230
+ }
1231
+ else {
1232
+ // Gap detected (a Deleted/MovedSource atom intervened). Start new group.
1233
+ groups.push({
1234
+ startOffset: currentOffset,
1235
+ length: atomLen,
1236
+ atoms: [atom],
1237
+ });
1238
+ }
1239
+ lastRevisedRun = run;
1240
+ }
1241
+ // Now split runs that have more than one group.
1242
+ for (const [run, groups] of runToGroups) {
1243
+ if (groups.length <= 1)
1244
+ continue;
1245
+ // Guard: skip runs already detached from the tree.
1246
+ if (!run.parentNode)
1247
+ continue;
1248
+ try {
1249
+ // Compute actual visible length of the DOM run.
1250
+ const contentEls = getDirectContentElements(run);
1251
+ let runVisibleLength = 0;
1252
+ for (const cel of contentEls) {
1253
+ runVisibleLength += visibleLengthForEl(cel);
1254
+ }
1255
+ // Safety: if the sum of atom lengths exceeds run visible length,
1256
+ // something is off (cross-run atoms, etc.). Skip.
1257
+ let sumAtomLengths = 0;
1258
+ for (const group of groups) {
1259
+ sumAtomLengths += group.length;
1260
+ }
1261
+ if (sumAtomLengths > runVisibleLength)
1262
+ continue;
1263
+ // Collect split points: the startOffset of each group after the first.
1264
+ const splitPoints = [];
1265
+ for (let i = 1; i < groups.length; i++) {
1266
+ const pt = groups[i].startOffset;
1267
+ if (pt > 0 && pt < runVisibleLength) {
1268
+ splitPoints.push(pt);
1269
+ }
1270
+ }
1271
+ if (splitPoints.length === 0)
1272
+ continue;
1273
+ // Split DOM run right-to-left to keep earlier offsets valid.
1274
+ const rightFragments = [];
1275
+ for (let i = splitPoints.length - 1; i >= 0; i--) {
1276
+ const { right } = splitRunAtVisibleOffset(run, splitPoints[i]);
1277
+ rightFragments.push(right);
1278
+ }
1279
+ // Map fragments: [originalRun (leftmost), ...reverse(rightFragments)]
1280
+ const fragments = [run, ...rightFragments.reverse()];
1281
+ // Update atom sourceRunElement pointers to the correct fragment.
1282
+ for (let i = 0; i < groups.length; i++) {
1283
+ const fragment = fragments[i];
1284
+ if (!fragment)
1285
+ continue;
1286
+ for (const atom of groups[i].atoms) {
1287
+ atom.sourceRunElement = fragment;
1288
+ }
1289
+ }
1290
+ }
1291
+ catch (_err) {
1292
+ warn('preSplitInterleavedWordRuns', `Skipping run split due to error: ${_err}`);
1293
+ }
1294
+ }
1295
+ }
1083
1296
  /**
1084
1297
  * Modify the revised document's AST in-place based on comparison results.
1085
1298
  *
@@ -1097,6 +1310,7 @@ export function modifyRevisedDocument(revisedRoot, originalAtoms, revisedAtoms,
1097
1310
  attachSourceElementPointers(originalAtoms);
1098
1311
  attachSourceElementPointers(revisedAtoms);
1099
1312
  preSplitMixedStatusRuns(mergedAtoms);
1313
+ preSplitInterleavedWordRuns(mergedAtoms);
1100
1314
  // Process atoms and apply track changes to the revised tree
1101
1315
  // Group atoms by paragraph for efficient processing
1102
1316
  const ctx = processAtoms(mergedAtoms, originalAtoms, revisedAtoms, author, dateStr, state, revisedRoot);
@@ -1105,8 +1319,17 @@ export function modifyRevisedDocument(revisedRoot, originalAtoms, revisedAtoms,
1105
1319
  // - Reject All should remove inserted paragraphs entirely
1106
1320
  // - Accept All should remove deleted paragraphs entirely
1107
1321
  applyWholeParagraphRevisionMarkers(mergedAtoms, ctx);
1322
+ // Suppress field-adjacent no-op del/ins pairs (issue #42, Bug 1).
1323
+ // Must run BEFORE merge — after merge, pairwise comparison is impossible.
1324
+ suppressNoOpChangePairs(ctx.body);
1108
1325
  // Merge adjacent <w:ins>/<w:del> siblings to reduce revision fragmentation.
1109
1326
  mergeAdjacentTrackChangeSiblings(ctx.body);
1327
+ // Coalesce del/ins pair chains across whitespace (issue #42, Bug 2b).
1328
+ // Merges [del:A][ins:X][ws][del:B][ins:Y] → [del:A ws B][ins:X ws Y]
1329
+ coalesceDelInsPairChains(ctx.body);
1330
+ // Merge whitespace-bridged track change siblings (issue #42, Bug 2).
1331
+ // Runs AFTER coalesce — handles ins+ws+ins and moveTo+ws+moveTo bridging.
1332
+ mergeWhitespaceBridgedTrackChanges(ctx.body);
1110
1333
  // Apply strict post-render consumer compatibility pass
1111
1334
  enforceConsumerCompatibility(revisedRoot, () => allocateRevisionId(state));
1112
1335
  // Serialize the modified tree
@@ -1493,6 +1716,54 @@ const ATOM_HANDLERS = {
1493
1716
  [CorrelationStatus.Equal]: handleEqual,
1494
1717
  [CorrelationStatus.Unknown]: handleEqual,
1495
1718
  };
1719
+ const DELETION_LIKE_STATUSES = new Set([
1720
+ CorrelationStatus.Deleted,
1721
+ CorrelationStatus.MovedSource,
1722
+ ]);
1723
+ const INSERTION_LIKE_STATUSES = new Set([
1724
+ CorrelationStatus.Inserted,
1725
+ CorrelationStatus.MovedDestination,
1726
+ ]);
1727
+ /**
1728
+ * Reorder merged atoms so that within each contiguous block of non-equal atoms,
1729
+ * all deletion-like atoms come before all insertion-like atoms.
1730
+ *
1731
+ * This produces grouped tracked changes ("<del>old words</del><ins>new words</ins>")
1732
+ * instead of alternating word-by-word pairs ("<del>old1</del><ins>new1</ins><del>old2</del>...").
1733
+ */
1734
+ export function groupDeletionsBeforeInsertions(atoms) {
1735
+ const result = [];
1736
+ let i = 0;
1737
+ while (i < atoms.length) {
1738
+ const atom = atoms[i];
1739
+ const status = atom.correlationStatus;
1740
+ // Pass through equal/format-changed/unknown atoms unchanged
1741
+ if (!DELETION_LIKE_STATUSES.has(status) && !INSERTION_LIKE_STATUSES.has(status)) {
1742
+ result.push(atom);
1743
+ i++;
1744
+ continue;
1745
+ }
1746
+ // Collect a contiguous block of change atoms (deletions + insertions)
1747
+ const deletions = [];
1748
+ const insertions = [];
1749
+ while (i < atoms.length) {
1750
+ const s = atoms[i].correlationStatus;
1751
+ if (DELETION_LIKE_STATUSES.has(s)) {
1752
+ deletions.push(atoms[i]);
1753
+ }
1754
+ else if (INSERTION_LIKE_STATUSES.has(s)) {
1755
+ insertions.push(atoms[i]);
1756
+ }
1757
+ else {
1758
+ break;
1759
+ }
1760
+ i++;
1761
+ }
1762
+ // Emit all deletions first, then all insertions
1763
+ result.push(...deletions, ...insertions);
1764
+ }
1765
+ return result;
1766
+ }
1496
1767
  /**
1497
1768
  * Process atoms and apply track changes to the revised AST.
1498
1769
  *
@@ -1569,7 +1840,11 @@ function processAtoms(mergedAtoms, _originalAtoms, revisedAtoms, author, dateStr
1569
1840
  createdParagraphLastRun: new Map(),
1570
1841
  createdParagraphTrailingBookmarks: new Map(),
1571
1842
  };
1572
- for (const atom of mergedAtoms) {
1843
+ // Reorder atoms so consecutive deletions precede consecutive insertions.
1844
+ // This produces grouped tracked changes (all <w:del> then all <w:ins>)
1845
+ // instead of alternating word-by-word del/ins pairs.
1846
+ const reorderedAtoms = groupDeletionsBeforeInsertions(mergedAtoms);
1847
+ for (const atom of reorderedAtoms) {
1573
1848
  const handler = ATOM_HANDLERS[atom.correlationStatus];
1574
1849
  const result = handler(atom, ctx);
1575
1850
  // Update position tracking based on handler result
@@ -1690,6 +1965,315 @@ function mergeAdjacentTrackChangeSiblings(root) {
1690
1965
  }
1691
1966
  traverse(root);
1692
1967
  }
1968
+ // =============================================================================
1969
+ // Bug 1: Suppress field-adjacent false no-op del/ins pairs (issue #42)
1970
+ // =============================================================================
1971
+ /**
1972
+ * Build a normalized content signature for a run's non-rPr children.
1973
+ * On the del side, maps w:delText → w:t and w:delInstrText → w:instrText
1974
+ * so that content from del wrappers can be compared to ins wrappers.
1975
+ */
1976
+ function normalizeRunContentSignature(run, isDelSide) {
1977
+ const parts = [];
1978
+ for (let i = 0; i < run.childNodes.length; i++) {
1979
+ const child = run.childNodes[i];
1980
+ if (child.nodeType !== 1)
1981
+ continue;
1982
+ const el = child;
1983
+ if (el.tagName === 'w:rPr')
1984
+ continue;
1985
+ let tag = el.tagName;
1986
+ if (isDelSide) {
1987
+ if (tag === 'w:delText')
1988
+ tag = 'w:t';
1989
+ else if (tag === 'w:delInstrText')
1990
+ tag = 'w:instrText';
1991
+ }
1992
+ const text = el.textContent ?? '';
1993
+ parts.push(`<${tag}>${text}</${tag}>`);
1994
+ }
1995
+ return parts.join('');
1996
+ }
1997
+ /**
1998
+ * Check if an adjacent w:del + w:ins pair is a no-op (identical text and formatting).
1999
+ * Both wrappers must contain the same number of runs with matching rPr and content.
2000
+ */
2001
+ export function isNoOpPair(del, ins) {
2002
+ const delRuns = childElements(del).filter(c => c.tagName === 'w:r');
2003
+ const insRuns = childElements(ins).filter(c => c.tagName === 'w:r');
2004
+ if (delRuns.length !== insRuns.length)
2005
+ return false;
2006
+ if (delRuns.length === 0)
2007
+ return false;
2008
+ for (let i = 0; i < delRuns.length; i++) {
2009
+ const delRun = delRuns[i];
2010
+ const insRun = insRuns[i];
2011
+ // Compare formatting via canonical rPr comparison
2012
+ const delRPr = findChildByTagName(delRun, 'w:rPr');
2013
+ const insRPr = findChildByTagName(insRun, 'w:rPr');
2014
+ if (!areRunPropertiesEqual(delRPr ?? null, insRPr ?? null))
2015
+ return false;
2016
+ // Compare content structure (text, tabs, breaks, field chars, etc.)
2017
+ const delSig = normalizeRunContentSignature(delRun, true);
2018
+ const insSig = normalizeRunContentSignature(insRun, false);
2019
+ if (delSig !== insSig)
2020
+ return false;
2021
+ }
2022
+ return true;
2023
+ }
2024
+ /**
2025
+ * Suppress no-op del/ins pairs — adjacent w:del + w:ins wrappers where the
2026
+ * content and formatting are identical. These arise from field-adjacent atoms
2027
+ * that are false-positive changes.
2028
+ *
2029
+ * When a no-op is detected, both wrappers are unwrapped, leaving the ins-side
2030
+ * runs as plain (non-tracked) children. The del-side runs are removed.
2031
+ */
2032
+ export function suppressNoOpChangePairs(root) {
2033
+ function traverse(node) {
2034
+ const children = childElements(node);
2035
+ for (let i = 0; i < children.length - 1;) {
2036
+ const a = children[i];
2037
+ const b = children[i + 1];
2038
+ if (a.tagName === 'w:del' && b.tagName === 'w:ins' && isNoOpPair(a, b)) {
2039
+ // Remove the del wrapper and its content entirely
2040
+ node.removeChild(a);
2041
+ // Unwrap the ins wrapper — promote its children to the parent
2042
+ unwrapElement(b);
2043
+ // Re-snapshot children after mutation
2044
+ children.splice(i, 2, ...childElements(node).slice(i));
2045
+ // Don't increment — recheck from same position
2046
+ continue;
2047
+ }
2048
+ i++;
2049
+ }
2050
+ // Recurse into current children (re-query after mutations)
2051
+ for (const child of childElements(node)) {
2052
+ traverse(child);
2053
+ }
2054
+ }
2055
+ traverse(root);
2056
+ }
2057
+ // =============================================================================
2058
+ // Bug 2: Merge whitespace-bridged track change siblings (issue #42)
2059
+ // =============================================================================
2060
+ /**
2061
+ * Narrow whitespace predicate for bridging: returns true only if a w:r element's
2062
+ * visible children are exclusively w:t elements with whitespace-only text content.
2063
+ * Excludes w:tab, w:br, w:cr which have layout significance.
2064
+ */
2065
+ function isInlineWhitespaceOnlyRun(run) {
2066
+ if (run.tagName !== 'w:r')
2067
+ return false;
2068
+ let hasVisibleChild = false;
2069
+ for (let i = 0; i < run.childNodes.length; i++) {
2070
+ const child = run.childNodes[i];
2071
+ if (child.nodeType !== 1)
2072
+ continue;
2073
+ const el = child;
2074
+ if (el.tagName === 'w:rPr')
2075
+ continue;
2076
+ // Only w:t with whitespace-only text is allowed
2077
+ if (el.tagName === 'w:t') {
2078
+ const text = el.textContent ?? '';
2079
+ if (text.length === 0 || !/^\s+$/.test(text))
2080
+ return false;
2081
+ hasVisibleChild = true;
2082
+ continue;
2083
+ }
2084
+ // Any other visible element (w:tab, w:br, w:cr, w:fldChar, etc.) disqualifies
2085
+ return false;
2086
+ }
2087
+ return hasVisibleChild;
2088
+ }
2089
+ /**
2090
+ * Merge track-change wrappers (w:del or w:ins) that are separated by a
2091
+ * whitespace-only run. This groups "word-by-word" tracked changes into
2092
+ * contiguous blocks for cleaner presentation.
2093
+ *
2094
+ * For w:del: clones the whitespace run, converts w:t→w:delText, and absorbs
2095
+ * both the whitespace and the second wrapper's children into the first wrapper.
2096
+ *
2097
+ * For w:ins: moves the whitespace run into the first wrapper, then absorbs
2098
+ * the second wrapper's children.
2099
+ *
2100
+ * Both projections (Accept All, Reject All) remain correct because each
2101
+ * wrapper independently contains the whitespace it needs.
2102
+ */
2103
+ export function mergeWhitespaceBridgedTrackChanges(root) {
2104
+ function traverse(node) {
2105
+ const children = childElements(node);
2106
+ for (let i = 0; i < children.length - 2;) {
2107
+ const a = children[i];
2108
+ const mid = children[i + 1];
2109
+ const b = children[i + 2];
2110
+ // Check: same track-change tag, same author/date, with whitespace-only run between.
2111
+ // Only bridge w:ins and w:moveTo — bridging w:del is unsafe because the
2112
+ // intervening whitespace is Equal content needed by the accept projection.
2113
+ const bridgeableTags = new Set(['w:ins', 'w:moveTo']);
2114
+ if (a.tagName === b.tagName &&
2115
+ bridgeableTags.has(a.tagName) &&
2116
+ a.getAttribute('w:author') === b.getAttribute('w:author') &&
2117
+ a.getAttribute('w:date') === b.getAttribute('w:date') &&
2118
+ isInlineWhitespaceOnlyRun(mid)) {
2119
+ // Move the whitespace run into the first wrapper, then absorb second's children
2120
+ a.appendChild(mid);
2121
+ while (b.firstChild)
2122
+ a.appendChild(b.firstChild);
2123
+ node.removeChild(b);
2124
+ // mid was moved into a, b removed from parent — splice both from snapshot
2125
+ children.splice(i + 1, 2);
2126
+ // Don't increment — recheck a with new next sibling
2127
+ continue;
2128
+ }
2129
+ i++;
2130
+ }
2131
+ // Recurse into current children (re-query after mutations)
2132
+ for (const child of childElements(node)) {
2133
+ traverse(child);
2134
+ }
2135
+ }
2136
+ traverse(root);
2137
+ }
2138
+ // =============================================================================
2139
+ // Bug 2b: Coalesce del/ins pair chains across whitespace (issue #42)
2140
+ // =============================================================================
2141
+ /**
2142
+ * Convert w:t → w:delText and w:instrText → w:delInstrText within a run,
2143
+ * preserving xml:space attributes. Used for cloning whitespace runs into
2144
+ * w:del wrappers during pair-chain coalescing.
2145
+ */
2146
+ function convertRunTextToDelText(run) {
2147
+ for (let i = 0; i < run.childNodes.length; i++) {
2148
+ const child = run.childNodes[i];
2149
+ if (child.nodeType !== 1)
2150
+ continue;
2151
+ const el = child;
2152
+ if (el.tagName === 'w:t' || el.tagName === 'w:instrText') {
2153
+ const newTag = el.tagName === 'w:t' ? 'w:delText' : 'w:delInstrText';
2154
+ const newEl = createEl(newTag);
2155
+ // Copy text content
2156
+ while (el.firstChild)
2157
+ newEl.appendChild(el.firstChild);
2158
+ // Copy attributes (including xml:space="preserve")
2159
+ for (let j = 0; j < el.attributes.length; j++) {
2160
+ const attr = el.attributes[j];
2161
+ newEl.setAttribute(attr.name, attr.value);
2162
+ }
2163
+ run.replaceChild(newEl, el);
2164
+ }
2165
+ }
2166
+ }
2167
+ /**
2168
+ * Coalesce alternating del/ins pair chains separated by whitespace-only runs
2169
+ * into single grouped del + ins wrappers.
2170
+ *
2171
+ * Pattern: [w:del, w:ins, ws-segment..., w:del, w:ins, ws-segment..., w:del, w:ins]
2172
+ *
2173
+ * For each whitespace segment between consecutive [del, ins] pairs:
2174
+ * 1. Clone each ws-run → convert to delText → append to first del
2175
+ * 2. Clone each ws-run → keep as w:t → append to first ins
2176
+ * 3. Move nextDel's children into first del
2177
+ * 4. Move nextIns's children into first ins
2178
+ * 5. Remove original ws-runs, empty nextDel, empty nextIns from parent
2179
+ *
2180
+ * Safety invariants:
2181
+ * - Only bridges when both del AND ins absorb the whitespace (both projections correct)
2182
+ * - Incomplete tail [del, ins, ws, del] (no trailing ins) → stop chain, don't bridge
2183
+ * - All wrappers in chain must share same w:author and w:date
2184
+ */
2185
+ export function coalesceDelInsPairChains(root) {
2186
+ function traverse(node) {
2187
+ const children = childElements(node);
2188
+ for (let i = 0; i < children.length - 1;) {
2189
+ const firstDel = children[i];
2190
+ const firstIns = children[i + 1];
2191
+ // Must start with a [del, ins] pair
2192
+ if (firstDel.tagName !== 'w:del' || firstIns.tagName !== 'w:ins') {
2193
+ i++;
2194
+ continue;
2195
+ }
2196
+ const author = firstDel.getAttribute('w:author');
2197
+ const date = firstDel.getAttribute('w:date');
2198
+ // All four must match author/date
2199
+ if (firstIns.getAttribute('w:author') !== author ||
2200
+ firstIns.getAttribute('w:date') !== date) {
2201
+ i++;
2202
+ continue;
2203
+ }
2204
+ // Try to extend the chain by absorbing subsequent [ws..., del, ins] triples
2205
+ let cursor = i + 2; // position after firstIns
2206
+ let chainExtended = false;
2207
+ while (cursor < children.length) {
2208
+ // Collect whitespace segment (1..N consecutive whitespace-only runs)
2209
+ const wsStart = cursor;
2210
+ while (cursor < children.length && isInlineWhitespaceOnlyRun(children[cursor])) {
2211
+ cursor++;
2212
+ }
2213
+ const wsEnd = cursor;
2214
+ const wsCount = wsEnd - wsStart;
2215
+ if (wsCount === 0)
2216
+ break; // No whitespace → end of chain
2217
+ // Must have a complete [del, ins] pair after the whitespace
2218
+ if (cursor + 1 >= children.length)
2219
+ break; // Not enough elements
2220
+ const nextDel = children[cursor];
2221
+ const nextIns = children[cursor + 1];
2222
+ if (nextDel.tagName !== 'w:del' || nextIns.tagName !== 'w:ins')
2223
+ break;
2224
+ // Author/date must match
2225
+ if (nextDel.getAttribute('w:author') !== author ||
2226
+ nextDel.getAttribute('w:date') !== date ||
2227
+ nextIns.getAttribute('w:author') !== author ||
2228
+ nextIns.getAttribute('w:date') !== date)
2229
+ break;
2230
+ // All conditions met — absorb this [ws..., del, ins] into the first pair
2231
+ // 1. Clone whitespace runs into del (as delText) and ins (as w:t)
2232
+ for (let w = wsStart; w < wsEnd; w++) {
2233
+ const wsRun = children[w];
2234
+ const delClone = wsRun.cloneNode(true);
2235
+ convertRunTextToDelText(delClone);
2236
+ firstDel.appendChild(delClone);
2237
+ const insClone = wsRun.cloneNode(true);
2238
+ firstIns.appendChild(insClone);
2239
+ }
2240
+ // 2. Move nextDel's children into firstDel
2241
+ while (nextDel.firstChild)
2242
+ firstDel.appendChild(nextDel.firstChild);
2243
+ // 3. Move nextIns's children into firstIns
2244
+ while (nextIns.firstChild)
2245
+ firstIns.appendChild(nextIns.firstChild);
2246
+ // 4. Remove ws-runs, nextDel, nextIns from parent
2247
+ for (let w = wsStart; w < wsEnd; w++) {
2248
+ node.removeChild(children[w]);
2249
+ }
2250
+ node.removeChild(nextDel);
2251
+ node.removeChild(nextIns);
2252
+ // 5. Splice removed elements from children snapshot
2253
+ // Remove wsCount + 2 elements starting at wsStart
2254
+ children.splice(wsStart, wsCount + 2);
2255
+ // Reset cursor to continue checking after firstIns
2256
+ cursor = wsStart;
2257
+ chainExtended = true;
2258
+ }
2259
+ // Advance past the (possibly extended) [del, ins] pair
2260
+ i += chainExtended ? 2 : 1;
2261
+ // If no chain was formed, we only skip the del (i++ already happened above
2262
+ // for the non-chain case), but if it was a chain we skip both del+ins.
2263
+ // Actually: if chain wasn't extended, we need to check if firstDel+firstIns
2264
+ // alone should advance by 2 or by 1. Since they ARE a del+ins pair but
2265
+ // no chain formed, skip both.
2266
+ if (!chainExtended) {
2267
+ i++; // skip the ins too (total i += 2 from the earlier i++)
2268
+ }
2269
+ }
2270
+ // Recurse into current children (re-query after mutations)
2271
+ for (const child of childElements(node)) {
2272
+ traverse(child);
2273
+ }
2274
+ }
2275
+ traverse(root);
2276
+ }
1693
2277
  // Re-export for convenience
1694
2278
  export { createRevisionIdState };
1695
2279
  //# sourceMappingURL=inPlaceModifier.js.map