@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +38 -14
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +10 -0
- package/dist/HtmlDiff.d.mts +10 -0
- package/dist/HtmlDiff.mjs +38 -14
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +57 -21
- package/src/ThreeWayTable.ts +15 -1
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +11 -0
- package/test/HtmlDiff.threeWay.tables.spec.ts +26 -0
package/package.json
CHANGED
package/src/HtmlDiff.ts
CHANGED
|
@@ -162,7 +162,22 @@ export default class HtmlDiff {
|
|
|
162
162
|
// constructor overload that would re-leak the parameter we just hid.
|
|
163
163
|
private tablePreprocessDepth = 0
|
|
164
164
|
|
|
165
|
-
|
|
165
|
+
/**
|
|
166
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
167
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
168
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
169
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
170
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
171
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
172
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
173
|
+
* handler for the split logic.
|
|
174
|
+
*/
|
|
175
|
+
private specialTagDiffStack: Array<{
|
|
176
|
+
tag: string
|
|
177
|
+
styledTagNames: string
|
|
178
|
+
cssClass: string
|
|
179
|
+
metadata: WrapMetadata | undefined
|
|
180
|
+
}> = []
|
|
166
181
|
private newWords: string[] = []
|
|
167
182
|
private oldWords: string[] = []
|
|
168
183
|
/**
|
|
@@ -827,8 +842,13 @@ export default class HtmlDiff {
|
|
|
827
842
|
// if there are nonTags, the index of the last tag is the index before the first nonTag.
|
|
828
843
|
const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1
|
|
829
844
|
|
|
830
|
-
|
|
831
|
-
|
|
845
|
+
// Pre-injection sits BEFORE the extracted tag-block content (used
|
|
846
|
+
// by closing tags so `</ins></strong>` reads left-to-right).
|
|
847
|
+
// Post-injection sits AFTER (used by opening tags so the rendered
|
|
848
|
+
// order is `<strong><ins ...>` and by the overlap-split case so
|
|
849
|
+
// the re-opened `<ins>`s sit AFTER the actual closing tag).
|
|
850
|
+
let preInject = ''
|
|
851
|
+
let postInject = ''
|
|
832
852
|
|
|
833
853
|
// handle opening tag
|
|
834
854
|
if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
|
|
@@ -840,10 +860,11 @@ export default class HtmlDiff {
|
|
|
840
860
|
}
|
|
841
861
|
const styledTagNames = Array.from(tagNames).join(' ')
|
|
842
862
|
|
|
843
|
-
this.specialTagDiffStack.push(words[0])
|
|
844
863
|
// Carry the caller's metadata into the formatting-tag wrapper so
|
|
845
864
|
// a 3-way author tag survives a `<strong>`/`<em>` content edit.
|
|
846
|
-
|
|
865
|
+
const styledCssClass = `mod ${styledTagNames}`
|
|
866
|
+
this.specialTagDiffStack.push({ tag: words[0], styledTagNames, cssClass: styledCssClass, metadata })
|
|
867
|
+
postInject = `<ins${Utils.composeTagAttributes(styledCssClass, metadata ?? {})}>`
|
|
847
868
|
if (tag === HtmlDiff.DelTag) {
|
|
848
869
|
words.shift()
|
|
849
870
|
|
|
@@ -855,7 +876,6 @@ export default class HtmlDiff {
|
|
|
855
876
|
}
|
|
856
877
|
// handle closing tag
|
|
857
878
|
else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
|
|
858
|
-
const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop()
|
|
859
879
|
// For delete operations: when the tag block contains a mix of formatting and
|
|
860
880
|
// non-formatting closing tags (e.g. </strong></div>), compare against the first
|
|
861
881
|
// closing tag (the formatting one) rather than the last tag in the block.
|
|
@@ -870,19 +890,39 @@ export default class HtmlDiff {
|
|
|
870
890
|
tagIndexToCompare = 0
|
|
871
891
|
}
|
|
872
892
|
}
|
|
873
|
-
const openingAndClosingTagsMatch =
|
|
874
|
-
!!openingTag && Utils.getTagName(openingTag) === Utils.getTagName(words[tagIndexToCompare])
|
|
875
893
|
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
894
|
+
// Search the stack for a matching opener (LIFO). When the match
|
|
895
|
+
// is the top entry, this is the normal balanced case and we
|
|
896
|
+
// emit a single `</ins>` before the closing tag. When the match
|
|
897
|
+
// is below an unmatched opener — i.e. another formatting wrap
|
|
898
|
+
// opened after it but hasn't been closed yet — the wraps
|
|
899
|
+
// overlap in source order, which has no valid LIFO HTML
|
|
900
|
+
// expression. Resolve by SPLITTING the wraps: close everything
|
|
901
|
+
// above the match (their `<ins>`s and the match's `<ins>`), then
|
|
902
|
+
// re-open the above wraps with fresh `<ins>` tags AFTER the
|
|
903
|
+
// closing tag emits. The above wraps continue to apply until
|
|
904
|
+
// their own closing tag arrives.
|
|
905
|
+
const closingTagName = Utils.getTagName(words[tagIndexToCompare])
|
|
906
|
+
let matchIdx = -1
|
|
907
|
+
for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) {
|
|
908
|
+
if (Utils.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
|
|
909
|
+
matchIdx = i
|
|
910
|
+
break
|
|
911
|
+
}
|
|
879
912
|
}
|
|
880
913
|
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
914
|
+
if (matchIdx >= 0) {
|
|
915
|
+
const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1)
|
|
916
|
+
this.specialTagDiffStack.pop() // pop the matched entry
|
|
917
|
+
// One `</ins>` per above entry, then one for the match itself.
|
|
918
|
+
preInject = '</ins>'.repeat(aboveEntries.length + 1)
|
|
919
|
+
for (const entry of aboveEntries) {
|
|
920
|
+
postInject += `<ins${Utils.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`
|
|
921
|
+
this.specialTagDiffStack.push(entry) // their wrap continues via the new <ins>
|
|
922
|
+
}
|
|
885
923
|
}
|
|
924
|
+
// No match in stack — orphan closing tag, drop the `<ins>` work
|
|
925
|
+
// and just let the tag itself flow through extractConsecutiveWords.
|
|
886
926
|
|
|
887
927
|
if (tag === HtmlDiff.DelTag) {
|
|
888
928
|
words.shift()
|
|
@@ -893,7 +933,7 @@ export default class HtmlDiff {
|
|
|
893
933
|
}
|
|
894
934
|
}
|
|
895
935
|
|
|
896
|
-
if (words.length === 0 &&
|
|
936
|
+
if (words.length === 0 && preInject.length === 0 && postInject.length === 0) {
|
|
897
937
|
break
|
|
898
938
|
}
|
|
899
939
|
|
|
@@ -909,11 +949,7 @@ export default class HtmlDiff {
|
|
|
909
949
|
!HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase())
|
|
910
950
|
: Utils.isTag
|
|
911
951
|
|
|
912
|
-
|
|
913
|
-
this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(''))
|
|
914
|
-
} else {
|
|
915
|
-
this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join('') + specialCaseTagInjection)
|
|
916
|
-
}
|
|
952
|
+
this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join('') + postInject)
|
|
917
953
|
|
|
918
954
|
if (words.length === 0) continue
|
|
919
955
|
|
package/src/ThreeWayTable.ts
CHANGED
|
@@ -303,7 +303,21 @@ function preprocessByContent(
|
|
|
303
303
|
return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
|
|
304
304
|
}
|
|
305
305
|
|
|
306
|
-
|
|
306
|
+
// Positional pairing is the strict-default for three-way table merge:
|
|
307
|
+
// when all three inputs have the same number of tables in the same
|
|
308
|
+
// order, we pair them by index and let `diffTableThreeWay` handle
|
|
309
|
+
// per-table cell/row level differences. The similarity guard below
|
|
310
|
+
// only kicks in to *reject* positional alignment when a pair is
|
|
311
|
+
// SO dissimilar that it's near-certainly a table reorder/rename
|
|
312
|
+
// where content-LCS pairing would be materially better. The
|
|
313
|
+
// threshold is intentionally low — the 2-way path has no such guard
|
|
314
|
+
// and pairs purely by index (its `diffTable` falls back through
|
|
315
|
+
// same-dimension → equal-row-count → row-LCS → whole-table on its
|
|
316
|
+
// own), so the three-way path was stricter than its sibling and
|
|
317
|
+
// silently dropped to whole-table del+ins for legitimate edits
|
|
318
|
+
// like "rename one column and tweak its values". Aligning the
|
|
319
|
+
// threshold here keeps the two-way and three-way paths in step.
|
|
320
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.15
|
|
307
321
|
|
|
308
322
|
function positionallyAligned(
|
|
309
323
|
genesis: string,
|
package/test/HtmlDiff.spec.ts
CHANGED
|
@@ -48,6 +48,21 @@ describe('HtmlDiff', () => {
|
|
|
48
48
|
'Some formatted text',
|
|
49
49
|
"Some <ins class='mod strong i'>formatted</ins> text",
|
|
50
50
|
],
|
|
51
|
+
// Overlapping formatting wraps — old wraps a word in <strong>, new wraps the same
|
|
52
|
+
// word in <u>. The wraps cross (mod-strong opens before mod-u, but the </strong>
|
|
53
|
+
// closing arrives before </u>), so emission must split the inner wrap to keep
|
|
54
|
+
// HTML well-formed. Regression: previously left mod-strong unclosed and the
|
|
55
|
+
// 3-way path threw on the unbalanced stack.
|
|
56
|
+
[
|
|
57
|
+
'<strong>three</strong>',
|
|
58
|
+
'<u>three</u>',
|
|
59
|
+
"<ins class='mod strong'><u><ins class='mod u'>three</ins></ins><ins class='mod u'></ins></u>",
|
|
60
|
+
],
|
|
61
|
+
[
|
|
62
|
+
'a <strong>three</strong> b',
|
|
63
|
+
'a <u>three</u> b',
|
|
64
|
+
"a <ins class='mod strong'><u><ins class='mod u'>three</ins></ins><ins class='mod u'></ins></u> b",
|
|
65
|
+
],
|
|
51
66
|
[
|
|
52
67
|
'<table><tr><td>col1</td><td>col2</td></tr><tr><td>Data 1</td><td>Data 2</td></tr></table>',
|
|
53
68
|
'<table><tr><td>col1</td><td>col2</td></tr></table>',
|
|
@@ -144,6 +144,17 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
144
144
|
expect(out).toMatch(/<p>First paragraph.*data-author='cp'.*<\/p>/)
|
|
145
145
|
expect(out).toMatch(/<p>Second paragraph.*data-author='me'.*<\/p>/)
|
|
146
146
|
})
|
|
147
|
+
|
|
148
|
+
it('overlapping formatting wraps from each author do not unbalance the emission stack', () => {
|
|
149
|
+
// Genesis: plain "three". CP wrapped it in <strong>, Me in <u>. The
|
|
150
|
+
// mod-strong (cp) and mod-u (me) wraps cross: <strong> opens before
|
|
151
|
+
// <u>, but </strong> arrives before </u>. The emitter must split
|
|
152
|
+
// the inner wrap so the output stays well-formed instead of
|
|
153
|
+
// throwing an unbalanced-stack error.
|
|
154
|
+
expect(HtmlDiff.executeThreeWay('three', '<strong>three</strong>', '<u>three</u>')).toBe(
|
|
155
|
+
"<strong><ins class='mod strong cp' data-author='cp'><u><ins class='mod u me' data-author='me'>three</ins></ins></strong><ins class='mod u me' data-author='me'></ins></u>"
|
|
156
|
+
)
|
|
157
|
+
})
|
|
147
158
|
})
|
|
148
159
|
|
|
149
160
|
describe('options pass-through', () => {
|
|
@@ -298,4 +298,30 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
|
|
|
298
298
|
expect(HtmlDiff.executeThreeWay('<p>a</p>', '<p>a</p>', '<p>a</p>')).toBe('<p>a</p>')
|
|
299
299
|
})
|
|
300
300
|
})
|
|
301
|
+
|
|
302
|
+
describe('positional pairing under moderate dissimilarity', () => {
|
|
303
|
+
it('column rename + value rewrite still routes through cell-level diff (not whole-table del+ins)', () => {
|
|
304
|
+
// Real-world regression: cp renamed a column ("Form/Document/Certificate"
|
|
305
|
+
// → "Extra column") and replaced the values in that column with short
|
|
306
|
+
// tokens. Word-level Jaccard between the genesis table and cp's edited
|
|
307
|
+
// table drops to ~0.38 — under the 0.5 threshold the three-way path
|
|
308
|
+
// used to take, which kicked the diff into multi-table content-LCS
|
|
309
|
+
// and produced whole-table del+ins (the cp's CP-bubble showed the
|
|
310
|
+
// entire old table struck through and the entire new table inserted).
|
|
311
|
+
// 2-way had no such guard and produced a cell-level diff for the same
|
|
312
|
+
// inputs; lowering the 3-way threshold brings the two paths in step.
|
|
313
|
+
const genesis =
|
|
314
|
+
'<table><tr><td>A</td><td>Form/Document/Certificate</td><td>Date</td></tr><tr><td>Party A</td><td>IRS W-8</td><td>On execution</td></tr></table>'
|
|
315
|
+
const cp =
|
|
316
|
+
'<table><tr><td>A</td><td>Extra column</td><td>Date</td></tr><tr><td>Party A</td><td>Yes</td><td>On execution</td></tr></table>'
|
|
317
|
+
const me = genesis
|
|
318
|
+
const out = HtmlDiff.executeThreeWay(genesis, cp, me)
|
|
319
|
+
// Expect cell-level cp attribution INSIDE the table cells, NOT a
|
|
320
|
+
// whole-table del+ins wrapping the entire <table>.
|
|
321
|
+
expect(out).not.toMatch(/<del[^>]*><table/)
|
|
322
|
+
expect(out).toMatch(/data-author='cp'/)
|
|
323
|
+
expect(out).toContain('Extra column')
|
|
324
|
+
expect(out).toContain('Form/Document/Certificate')
|
|
325
|
+
})
|
|
326
|
+
})
|
|
301
327
|
})
|