npm - @createiq/htmldiff - Versions diffs - 1.2.0-beta.7 → 1.2.0-beta.9 - Mend

@createiq/htmldiff 1.2.0-beta.7 → 1.2.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/HtmlDiff.cjs +56 -11
package/dist/HtmlDiff.cjs.map +1 -1
package/dist/HtmlDiff.mjs +56 -11
package/dist/HtmlDiff.mjs.map +1 -1
package/package.json +1 -1
package/src/HtmlDiff.ts +25 -0
package/src/ThreeWayDiff.ts +53 -11
package/test/HtmlDiff.threeWay.spec.ts +65 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@createiq/htmldiff",
-  "version": "1.2.0-beta.7",
+  "version": "1.2.0-beta.9",
   "description": "TypeScript port of htmldiff.net",
   "type": "module",
   "author": "Mathew Mannion <mathew.mannion@linklaters.com>",

package/src/HtmlDiff.ts CHANGED Viewed

@@ -1119,6 +1119,31 @@ export default class HtmlDiff {
         continue
       }
+      // Never orphan-reject a match whose tokens are ALL HTML tags.
+      // Tag tokens are structural; rejecting `</strong>` / `</em>` as
+      // an orphan match between two content deletions merges the tag
+      // into the deletion, leaving the matching opener unclosed —
+      // browsers then auto-close the opener at the END of the
+      // deletion, producing visually-wrong output (e.g. the body of
+      // a section deletion rendered as bold-italic because the
+      // closing `</strong></em>` ended up after the body deletion
+      // rather than after the heading). The orphan threshold is
+      // designed for stray word matches between heavily-edited spans,
+      // not for formatting boundaries.
+      let allTags = true
+      for (let i = curr.startInNew; i < curr.endInNew; i++) {
+        if (!Utils.isTag(wordsForDiffNew[i])) {
+          allTags = false
+          break
+        }
+      }
+      if (allTags) {
+        yield curr
+        prev = curr
+        curr = next
+        continue
+      }
       let oldDistanceInChars = 0
       for (let i = prev.endInOld; i < next.startInOld; i++) {
         oldDistanceInChars += wordsForDiffOld[i].length

package/src/ThreeWayDiff.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import Action from './Action'
+import { lcsAlign } from './Alignment'
 import type { AnalyzeResult } from './HtmlDiff'
 import type Operation from './Operation'
 import type { WrapMetadata } from './Utils'
@@ -183,13 +184,31 @@ function collectInsertionsKeyedByEnd(d: AnalyzeResult): Map<number, string[]> {
 }
 /**
- * Emit any insertions at boundary `b`. When both authors inserted at
- * the same boundary AND the inserted token sequences are textually
- * identical, the insertion is treated as agreed and emitted unmarked.
- * Otherwise each side's insertion is emitted with author attribution.
+ * Emit any insertions at boundary `b`. Three cases:
  *
- * The CP-then-Me ordering for disagreement is arbitrary but consistent;
- * callers don't depend on it.
+ *   1. One side inserted, the other didn't → emit that side's tokens
+ *      with author attribution.
+ *   2. Both sides inserted the EXACT same sequence → settled, emit
+ *      unmarked.
+ *   3. Both sides inserted overlapping but different sequences (the
+ *      common case: one author accepted the other's insertion and
+ *      edited it, so e.g. cp's "X Y Z" overlaps me's "X Y a Z" with
+ *      "a" being a one-author-only addition). Run an LCS sub-diff
+ *      between the two insertion sequences and emit:
+ *        - tokens in BOTH → settled (equal segment)
+ *        - tokens only in cp → ins-cp
+ *        - tokens only in me → ins-me
+ *      The order of emission preserves the natural reading flow of
+ *      the merged insertion — common tokens read where they appear,
+ *      with author-only deltas inserted in their LCS-determined
+ *      positions.
+ *
+ * Without this sub-alignment, real-world flows like "Me added 'add
+ * more things here', CP accepted minus 'things'" would render as two
+ * full redundant insertions (`<ins cp>add more here</ins><ins me>add
+ * more things here</ins>`) rather than the obvious single shared
+ * insertion with a me-only "things" word — confusing to read and a
+ * regression vs Word's track-changes UX.
  */
 function emitBoundary(
   b: number,
@@ -205,14 +224,37 @@ function emitBoundary(
   const hasMe = !!meIns && meIns.length > 0
   if (!hasCp && !hasMe) return
-  if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
-    // Both authors inserted the same content — settled. Emit unmarked.
-    appendSegment(segments, { kind: 'equal' }, cpIns)
+  // Only-one-side: emit verbatim with attribution.
+  if (!hasCp) {
+    appendSegment(segments, { kind: 'ins', author: 'me' }, meIns!)
+    return
+  }
+  if (!hasMe) {
+    appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns!)
+    return
+  }
+  // Both sides inserted at this boundary. Identical sequences are
+  // settled; differing sequences get LCS-aligned and split into
+  // shared (settled) + author-only sub-segments.
+  if (tokenArraysEqual(cpIns!, meIns!)) {
+    appendSegment(segments, { kind: 'equal' }, cpIns!)
     return
   }
-  if (hasCp) appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns)
-  if (hasMe) appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
+  const alignment = lcsAlign(cpIns! as string[], meIns! as string[])
+  for (const a of alignment) {
+    if (a.oldIdx !== null && a.newIdx !== null) {
+      // Token appears in both insertions → settled.
+      appendSegment(segments, { kind: 'equal' }, [cpIns![a.oldIdx]])
+    } else if (a.oldIdx !== null) {
+      // Token in cp's insertion only.
+      appendSegment(segments, { kind: 'ins', author: 'cp' }, [cpIns![a.oldIdx]])
+    } else if (a.newIdx !== null) {
+      // Token in me's insertion only.
+      appendSegment(segments, { kind: 'ins', author: 'me' }, [meIns![a.newIdx]])
+    }
+  }
 }
 function tokenArraysEqual(a: readonly string[], b: readonly string[]): boolean {

package/test/HtmlDiff.threeWay.spec.ts CHANGED Viewed

@@ -43,9 +43,13 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
     it('CP and Me each change the same word differently', () => {
       // Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
-      // Disagreement — show both authors' insertions.
+      // Disagreement — show both authors' insertions. The trailing
+      // space between the inserted word and "world" is shared by
+      // both insertion sequences, so the boundary-LCS recognises it
+      // as settled and emits the inserts as just the word, with the
+      // separating space outside the attribution wrappers.
       expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
-        "Hello <ins class='diffins cp' data-author='cp'>cruel </ins><ins class='diffins me' data-author='me'>brave </ins>world"
+        "Hello <ins class='diffins cp' data-author='cp'>cruel</ins><ins class='diffins me' data-author='me'>brave</ins> world"
       )
     })
@@ -71,6 +75,25 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
       )
     })
+    it("CP accepted Me's addition with a word removed — emits the common bulk as settled + one me-only word", () => {
+      // Real flow on the live preview:
+      //   - Me appends "And I add more things here" to a paragraph
+      //   - CP "accepts" Me's addition but deletes the word "things"
+      //     → CP's version of the appendix is "And I add more here"
+      // Genesis has neither addition. Both diffs (against genesis)
+      // are pure inserts with substantial overlap. Without the
+      // boundary LCS, the two insertions render as two full
+      // redundant spans:
+      //   <ins cp>And I add more here</ins><ins me>And I add more things here</ins>
+      // — visually confusing because the reader sees "And I add
+      // more" twice. The intent is clearly that CP refined Me's
+      // addition by removing one word, so the diff should surface
+      // the shared bulk as settled with a me-only "things".
+      expect(
+        HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
+      ).toBe("baseline. And I add more<ins class='diffins me' data-author='me'>&nbsp;things</ins> here")
+    })
     it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
       // The user's V3/V5 invariant: when neither party changes their position
       // in a subsequent turn, the diff should look identical to the previous
@@ -260,6 +283,46 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
     })
   })
+  describe('orphan-match guard for structural tags', () => {
+    // Real regression from the live preview (Additional Condition
+    // Precedent in the 2002 ISDA Schedule): when CP deletes a section
+    // whose answer renders as an empty formatting shell —
+    //   <p data-html="x"><em><strong></strong></em></p>
+    // — the `</strong>` and `</em>` matches sit between two content
+    // deletions ("Heading. " before, body after). At
+    // WORD_ALIGNED_OPTIONS.orphanMatchThreshold=0.25 those structural
+    // matches were rejected as orphans, swallowed into the deletion
+    // span, and the browser auto-closed the openers AT THE END of
+    // the deletion — visually rendering the entire deletion as
+    // bold-italic. The orphan filter now exempts tag-only matches
+    // so structural boundaries always survive.
+    it('CP deletes section with em+strong heading + plain body — closers stay between heading and body', () => {
+      const genesis =
+        '<p data-html="x"><em><strong>Additional Condition Precedent. </strong></em>For the purposes of Section 2(a)(iii).</p>'
+      const cp = '<p data-html="x"><em><strong></strong></em></p>'
+      const me = genesis
+      const out = HtmlDiff.executeThreeWay(genesis, cp, me, WORD_ALIGNED_OPTIONS)
+      // </strong> appears BEFORE the body deletion — meaning the
+      // body sits outside the bold-italic wrap, not inside it.
+      const closeStrongIdx = out.indexOf('</strong>')
+      const bodyDelIdx = out.indexOf('For the purposes')
+      expect(closeStrongIdx).toBeGreaterThan(0)
+      expect(bodyDelIdx).toBeGreaterThan(closeStrongIdx)
+      // No `<strong>…<del>body` substring exists — confirm by exact
+      // shape too. Heading wraps in strong+em, body is a plain del.
+      expect(out).toBe(
+        '<p data-html="x"><em><strong>' +
+          "<del class='diffdel cp' data-author='cp'>Additional Condition Precedent. </del>" +
+          '</strong></em>' +
+          "<del class='diffdel cp' data-author='cp'>For the purposes of Section 2(a)(iii).</del>" +
+          '</p>'
+      )
+    })
+  })
   describe('first-turn fallback', () => {
     it('cp == genesis means CP made no changes — Me-only attribution', () => {
       // Common case: this is the first turn where the counterparty hasn't