npm - @createiq/htmldiff - Versions diffs - 1.2.0-beta.8 → 1.2.0-beta.9 - Mend

@createiq/htmldiff 1.2.0-beta.8 → 1.2.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/HtmlDiff.cjs +45 -11
package/dist/HtmlDiff.cjs.map +1 -1
package/dist/HtmlDiff.mjs +45 -11
package/dist/HtmlDiff.mjs.map +1 -1
package/package.json +1 -1
package/src/ThreeWayDiff.ts +53 -11
package/test/HtmlDiff.threeWay.spec.ts +25 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@createiq/htmldiff",
-  "version": "1.2.0-beta.8",
+  "version": "1.2.0-beta.9",
   "description": "TypeScript port of htmldiff.net",
   "type": "module",
   "author": "Mathew Mannion <mathew.mannion@linklaters.com>",

package/src/ThreeWayDiff.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import Action from './Action'
+import { lcsAlign } from './Alignment'
 import type { AnalyzeResult } from './HtmlDiff'
 import type Operation from './Operation'
 import type { WrapMetadata } from './Utils'
@@ -183,13 +184,31 @@ function collectInsertionsKeyedByEnd(d: AnalyzeResult): Map<number, string[]> {
 }
 /**
- * Emit any insertions at boundary `b`. When both authors inserted at
- * the same boundary AND the inserted token sequences are textually
- * identical, the insertion is treated as agreed and emitted unmarked.
- * Otherwise each side's insertion is emitted with author attribution.
+ * Emit any insertions at boundary `b`. Three cases:
  *
- * The CP-then-Me ordering for disagreement is arbitrary but consistent;
- * callers don't depend on it.
+ *   1. One side inserted, the other didn't → emit that side's tokens
+ *      with author attribution.
+ *   2. Both sides inserted the EXACT same sequence → settled, emit
+ *      unmarked.
+ *   3. Both sides inserted overlapping but different sequences (the
+ *      common case: one author accepted the other's insertion and
+ *      edited it, so e.g. cp's "X Y Z" overlaps me's "X Y a Z" with
+ *      "a" being a one-author-only addition). Run an LCS sub-diff
+ *      between the two insertion sequences and emit:
+ *        - tokens in BOTH → settled (equal segment)
+ *        - tokens only in cp → ins-cp
+ *        - tokens only in me → ins-me
+ *      The order of emission preserves the natural reading flow of
+ *      the merged insertion — common tokens read where they appear,
+ *      with author-only deltas inserted in their LCS-determined
+ *      positions.
+ *
+ * Without this sub-alignment, real-world flows like "Me added 'add
+ * more things here', CP accepted minus 'things'" would render as two
+ * full redundant insertions (`<ins cp>add more here</ins><ins me>add
+ * more things here</ins>`) rather than the obvious single shared
+ * insertion with a me-only "things" word — confusing to read and a
+ * regression vs Word's track-changes UX.
  */
 function emitBoundary(
   b: number,
@@ -205,14 +224,37 @@ function emitBoundary(
   const hasMe = !!meIns && meIns.length > 0
   if (!hasCp && !hasMe) return
-  if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
-    // Both authors inserted the same content — settled. Emit unmarked.
-    appendSegment(segments, { kind: 'equal' }, cpIns)
+  // Only-one-side: emit verbatim with attribution.
+  if (!hasCp) {
+    appendSegment(segments, { kind: 'ins', author: 'me' }, meIns!)
+    return
+  }
+  if (!hasMe) {
+    appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns!)
+    return
+  }
+  // Both sides inserted at this boundary. Identical sequences are
+  // settled; differing sequences get LCS-aligned and split into
+  // shared (settled) + author-only sub-segments.
+  if (tokenArraysEqual(cpIns!, meIns!)) {
+    appendSegment(segments, { kind: 'equal' }, cpIns!)
     return
   }
-  if (hasCp) appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns)
-  if (hasMe) appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
+  const alignment = lcsAlign(cpIns! as string[], meIns! as string[])
+  for (const a of alignment) {
+    if (a.oldIdx !== null && a.newIdx !== null) {
+      // Token appears in both insertions → settled.
+      appendSegment(segments, { kind: 'equal' }, [cpIns![a.oldIdx]])
+    } else if (a.oldIdx !== null) {
+      // Token in cp's insertion only.
+      appendSegment(segments, { kind: 'ins', author: 'cp' }, [cpIns![a.oldIdx]])
+    } else if (a.newIdx !== null) {
+      // Token in me's insertion only.
+      appendSegment(segments, { kind: 'ins', author: 'me' }, [meIns![a.newIdx]])
+    }
+  }
 }
 function tokenArraysEqual(a: readonly string[], b: readonly string[]): boolean {

package/test/HtmlDiff.threeWay.spec.ts CHANGED Viewed

@@ -43,9 +43,13 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
     it('CP and Me each change the same word differently', () => {
       // Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
-      // Disagreement — show both authors' insertions.
+      // Disagreement — show both authors' insertions. The trailing
+      // space between the inserted word and "world" is shared by
+      // both insertion sequences, so the boundary-LCS recognises it
+      // as settled and emits the inserts as just the word, with the
+      // separating space outside the attribution wrappers.
       expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
-        "Hello <ins class='diffins cp' data-author='cp'>cruel </ins><ins class='diffins me' data-author='me'>brave </ins>world"
+        "Hello <ins class='diffins cp' data-author='cp'>cruel</ins><ins class='diffins me' data-author='me'>brave</ins> world"
       )
     })
@@ -71,6 +75,25 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
       )
     })
+    it("CP accepted Me's addition with a word removed — emits the common bulk as settled + one me-only word", () => {
+      // Real flow on the live preview:
+      //   - Me appends "And I add more things here" to a paragraph
+      //   - CP "accepts" Me's addition but deletes the word "things"
+      //     → CP's version of the appendix is "And I add more here"
+      // Genesis has neither addition. Both diffs (against genesis)
+      // are pure inserts with substantial overlap. Without the
+      // boundary LCS, the two insertions render as two full
+      // redundant spans:
+      //   <ins cp>And I add more here</ins><ins me>And I add more things here</ins>
+      // — visually confusing because the reader sees "And I add
+      // more" twice. The intent is clearly that CP refined Me's
+      // addition by removing one word, so the diff should surface
+      // the shared bulk as settled with a me-only "things".
+      expect(
+        HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
+      ).toBe("baseline. And I add more<ins class='diffins me' data-author='me'>&nbsp;things</ins> here")
+    })
     it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
       // The user's V3/V5 invariant: when neither party changes their position
       // in a subsequent turn, the diff should look identical to the previous