@createiq/htmldiff 1.0.4 → 1.0.5-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/dist/HtmlDiff.cjs +68 -41
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +39 -17
- package/dist/HtmlDiff.d.mts +39 -17
- package/dist/HtmlDiff.mjs +68 -41
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +8 -8
- package/src/HtmlDiff.ts +105 -64
- package/test/HtmlDiff.spec.ts +118 -0
package/src/HtmlDiff.ts
CHANGED
|
@@ -80,8 +80,17 @@ export default class HtmlDiff {
|
|
|
80
80
|
/** Maps content-word index → original word index */
|
|
81
81
|
private oldContentToOriginal: number[] | null = null
|
|
82
82
|
private newContentToOriginal: number[] | null = null
|
|
83
|
-
/**
|
|
83
|
+
/**
|
|
84
|
+
* Tracks the next unwritten word index in oldWords/newWords. Mutated only by
|
|
85
|
+
* {@link sliceOriginalWordsForOp} (each op reads a slice and advances its cursor).
|
|
86
|
+
* Advances monotonically. Used so:
|
|
87
|
+
* - subsequent equal/delete ops know where in old to resume from
|
|
88
|
+
* - subsequent insert ops know where in new to resume from
|
|
89
|
+
* The two cursors are independent: equal/delete output from old and advance the old
|
|
90
|
+
* cursor; insert outputs from new and advances the new cursor.
|
|
91
|
+
*/
|
|
84
92
|
private lastOriginalOldOutputIndex = 0
|
|
93
|
+
private lastOriginalNewOutputIndex = 0
|
|
85
94
|
private matchGranularity = 0
|
|
86
95
|
private blockExpressions: RegExp[] = []
|
|
87
96
|
|
|
@@ -185,23 +194,18 @@ export default class HtmlDiff {
|
|
|
185
194
|
}
|
|
186
195
|
|
|
187
196
|
/**
|
|
188
|
-
*
|
|
189
|
-
*
|
|
190
|
-
*
|
|
197
|
+
* Builds "content projections" — word arrays with structural wrapper tags stripped — when
|
|
198
|
+
* structural normalization is appropriate for these inputs. The diff algorithm operates on
|
|
199
|
+
* the projections so wrapper-tag differences (e.g. `<p>` vs `<div>`) don't appear as content
|
|
200
|
+
* changes; structural tags are then folded back in at output time.
|
|
191
201
|
*/
|
|
192
202
|
private buildContentProjections() {
|
|
193
|
-
|
|
194
|
-
// If structural tags are the same, the normal diff works fine and is simpler.
|
|
195
|
-
if (!HtmlDiff.hasStructuralDifferences(this.oldWords, this.newWords)) {
|
|
196
|
-
return
|
|
197
|
-
}
|
|
203
|
+
if (!HtmlDiff.hasStructuralDifferences(this.oldWords, this.newWords)) return
|
|
198
204
|
|
|
199
205
|
const oldProjection = HtmlDiff.createContentProjection(this.oldWords)
|
|
200
206
|
const newProjection = HtmlDiff.createContentProjection(this.newWords)
|
|
201
207
|
|
|
202
|
-
|
|
203
|
-
// that's a genuine addition/deletion, not a re-wrapping scenario.
|
|
204
|
-
if (oldProjection.contentWords.length === 0 || newProjection.contentWords.length === 0) {
|
|
208
|
+
if (!HtmlDiff.shouldUseContentProjections(this.oldWords, this.newWords, oldProjection, newProjection)) {
|
|
205
209
|
return
|
|
206
210
|
}
|
|
207
211
|
|
|
@@ -211,6 +215,32 @@ export default class HtmlDiff {
|
|
|
211
215
|
this.newContentToOriginal = newProjection.contentToOriginal
|
|
212
216
|
}
|
|
213
217
|
|
|
218
|
+
/**
|
|
219
|
+
* Decides whether structural normalization should be activated for this pair of inputs.
|
|
220
|
+
* Each clause is a distinct correctness or fitness check — extend by adding a named
|
|
221
|
+
* sub-predicate rather than chaining ad-hoc conditions.
|
|
222
|
+
*/
|
|
223
|
+
private static shouldUseContentProjections(
|
|
224
|
+
oldWords: string[],
|
|
225
|
+
newWords: string[],
|
|
226
|
+
oldProjection: { contentWords: string[]; contentToOriginal: number[] },
|
|
227
|
+
newProjection: { contentWords: string[]; contentToOriginal: number[] }
|
|
228
|
+
): boolean {
|
|
229
|
+
// One side has no content at all: that's a genuine addition/deletion, not a wrapper rename.
|
|
230
|
+
// Normalization would mis-attribute the wrappers as part of the diff.
|
|
231
|
+
if (oldProjection.contentWords.length === 0 || newProjection.contentWords.length === 0) return false
|
|
232
|
+
|
|
233
|
+
// Asymmetric structural state: one side has no structural wrappers at all (e.g. plain text
|
|
234
|
+
// vs. wrapped HTML). Normalization would force the equal output to use the unwrapped side's
|
|
235
|
+
// (missing) structure and emit dangling closing tags from the wrapped side. The plain
|
|
236
|
+
// word-level diff handles this correctly without normalization.
|
|
237
|
+
const oldHasStructuralTags = oldProjection.contentWords.length < oldWords.length
|
|
238
|
+
const newHasStructuralTags = newProjection.contentWords.length < newWords.length
|
|
239
|
+
if (oldHasStructuralTags !== newHasStructuralTags) return false
|
|
240
|
+
|
|
241
|
+
return true
|
|
242
|
+
}
|
|
243
|
+
|
|
214
244
|
/**
|
|
215
245
|
* Tags that commonly serve as content wrappers and may change structurally
|
|
216
246
|
* without affecting the actual content. Only these tags are stripped during
|
|
@@ -224,6 +254,11 @@ export default class HtmlDiff {
|
|
|
224
254
|
return HtmlDiff.WrapperTags.has(tagName)
|
|
225
255
|
}
|
|
226
256
|
|
|
257
|
+
/** True when the word is a structural opening tag (e.g. `<p>`, `<div>`). */
|
|
258
|
+
private static isOpeningStructuralTag(word: string): boolean {
|
|
259
|
+
return HtmlDiff.isStructuralTag(word) && !word.startsWith('</')
|
|
260
|
+
}
|
|
261
|
+
|
|
227
262
|
/**
|
|
228
263
|
* Returns true if words between structural tags are just whitespace (indentation).
|
|
229
264
|
*/
|
|
@@ -303,84 +338,90 @@ export default class HtmlDiff {
|
|
|
303
338
|
}
|
|
304
339
|
|
|
305
340
|
private processInsertOperation(operation: Operation, cssClass: string) {
|
|
306
|
-
const words = this.
|
|
307
|
-
? this.
|
|
341
|
+
const words = this.usingContentProjections()
|
|
342
|
+
? this.sliceOriginalWordsForOp('new', operation.startInNew, operation.endInNew)
|
|
308
343
|
: this.newWords.slice(operation.startInNew, operation.endInNew)
|
|
309
344
|
this.insertTag(HtmlDiff.InsTag, cssClass, words)
|
|
310
345
|
}
|
|
311
346
|
|
|
312
347
|
private processDeleteOperation(operation: Operation, cssClass: string) {
|
|
313
|
-
const words = this.
|
|
314
|
-
? this.
|
|
348
|
+
const words = this.usingContentProjections()
|
|
349
|
+
? this.sliceOriginalWordsForOp('old', operation.startInOld, operation.endInOld)
|
|
315
350
|
: this.oldWords.slice(operation.startInOld, operation.endInOld)
|
|
316
351
|
this.insertTag(HtmlDiff.DelTag, cssClass, words)
|
|
317
|
-
|
|
318
|
-
// Advance the tracking index past the deleted range so subsequent equal operations
|
|
319
|
-
// don't re-include structural tags from the deleted section.
|
|
320
|
-
if (this.oldContentToOriginal && operation.endInOld > 0) {
|
|
321
|
-
const lastDeletedOrigIdx = this.oldContentToOriginal[operation.endInOld - 1]
|
|
322
|
-
this.lastOriginalOldOutputIndex = Math.max(this.lastOriginalOldOutputIndex, lastDeletedOrigIdx + 1)
|
|
323
|
-
}
|
|
324
352
|
}
|
|
325
353
|
|
|
326
354
|
private processEqualOperation(operation: Operation) {
|
|
327
|
-
if (this.
|
|
328
|
-
//
|
|
329
|
-
const result = this.
|
|
355
|
+
if (this.usingContentProjections()) {
|
|
356
|
+
// Output from old to preserve old's HTML structure for the matched content.
|
|
357
|
+
const result = this.sliceOriginalWordsForOp('old', operation.startInOld, operation.endInOld)
|
|
330
358
|
this.content.push(result.join(''))
|
|
359
|
+
|
|
360
|
+
// Advance new-side tracking past the equivalent range in new so the next insert op
|
|
361
|
+
// resumes from the correct position. We compute new's range with the same rule used
|
|
362
|
+
// for old (rather than mirroring old's count) so the two sides are independently sound
|
|
363
|
+
// when their structural tags don't perfectly parallel each other.
|
|
364
|
+
this.sliceOriginalWordsForOp('new', operation.startInNew, operation.endInNew)
|
|
331
365
|
} else {
|
|
332
366
|
const result = this.newWords.slice(operation.startInNew, operation.endInNew)
|
|
333
367
|
this.content.push(result.join(''))
|
|
334
368
|
}
|
|
335
369
|
}
|
|
336
370
|
|
|
337
|
-
/**
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
*/
|
|
341
|
-
private getOriginalOldWords(contentStart: number, contentEnd: number): string[] {
|
|
342
|
-
if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd)
|
|
343
|
-
const result: string[] = []
|
|
344
|
-
for (let i = contentStart; i < contentEnd; i++) {
|
|
345
|
-
result.push(this.oldWords[this.oldContentToOriginal[i]])
|
|
346
|
-
}
|
|
347
|
-
return result
|
|
371
|
+
/** True when content projections are active for both sides — i.e. structural normalization is in effect. */
|
|
372
|
+
private usingContentProjections(): boolean {
|
|
373
|
+
return this.oldContentToOriginal !== null && this.newContentToOriginal !== null
|
|
348
374
|
}
|
|
349
375
|
|
|
350
376
|
/**
|
|
351
|
-
*
|
|
352
|
-
*
|
|
377
|
+
* Returns the slice of original (old or new) words covering a content-index range,
|
|
378
|
+
* including the structural tags that surround the content. Advances the side's cursor
|
|
379
|
+
* past the slice so the next op resumes correctly.
|
|
380
|
+
*
|
|
381
|
+
* The slice extends:
|
|
382
|
+
* - LEADING: from the side's cursor (or the first content word's original index,
|
|
383
|
+
* whichever is smaller) so structural tags that precede the first content word
|
|
384
|
+
* are picked up by this op rather than left orphaned.
|
|
385
|
+
* - TRAILING (non-last range): from just after the last content word, including
|
|
386
|
+
* closing structural tags that close *this* op's paragraphs, but stopping at
|
|
387
|
+
* the first opening structural tag — that opening tag belongs to the next
|
|
388
|
+
* op's paragraph and would otherwise be emitted twice.
|
|
389
|
+
* - TRAILING (last range): all the way to the end of words, since there is no next
|
|
390
|
+
* op to claim the trailing tags.
|
|
353
391
|
*/
|
|
354
|
-
private
|
|
355
|
-
|
|
356
|
-
const
|
|
357
|
-
for (let i = contentStart; i < contentEnd; i++) {
|
|
358
|
-
result.push(this.newWords[this.newContentToOriginal[i]])
|
|
359
|
-
}
|
|
360
|
-
return result
|
|
361
|
-
}
|
|
392
|
+
private sliceOriginalWordsForOp(side: 'old' | 'new', contentStart: number, contentEnd: number): string[] {
|
|
393
|
+
const words = side === 'old' ? this.oldWords : this.newWords
|
|
394
|
+
const contentToOriginal = side === 'old' ? this.oldContentToOriginal : this.newContentToOriginal
|
|
362
395
|
|
|
363
|
-
|
|
364
|
-
* Gets original old words for a content-index range, INCLUDING structural tags and whitespace
|
|
365
|
-
* between the content words (used for equal operations to preserve old HTML structure).
|
|
366
|
-
*/
|
|
367
|
-
private getOriginalOldWordsWithStructure(contentStart: number, contentEnd: number): string[] {
|
|
368
|
-
if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd)
|
|
396
|
+
if (!contentToOriginal) return words.slice(contentStart, contentEnd)
|
|
369
397
|
if (contentStart >= contentEnd) return []
|
|
370
398
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
const
|
|
375
|
-
|
|
399
|
+
const firstContentOrigIdx = contentToOriginal[contentStart]
|
|
400
|
+
const lastContentOrigIdx = contentToOriginal[contentEnd - 1]
|
|
401
|
+
const cursor = side === 'old' ? this.lastOriginalOldOutputIndex : this.lastOriginalNewOutputIndex
|
|
402
|
+
const origStart = Math.min(cursor, firstContentOrigIdx)
|
|
403
|
+
|
|
404
|
+
let origEnd: number
|
|
405
|
+
if (contentEnd < contentToOriginal.length) {
|
|
406
|
+
// Non-last range: walk trailing tags after the last content word, stopping at the
|
|
407
|
+
// first opening structural tag so it can be emitted by the next op.
|
|
408
|
+
const limit = contentToOriginal[contentEnd]
|
|
409
|
+
origEnd = lastContentOrigIdx + 1
|
|
410
|
+
while (origEnd < limit && !HtmlDiff.isOpeningStructuralTag(words[origEnd])) {
|
|
411
|
+
origEnd++
|
|
412
|
+
}
|
|
413
|
+
} else {
|
|
414
|
+
// Last range: include everything to the end.
|
|
415
|
+
origEnd = words.length
|
|
416
|
+
}
|
|
376
417
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
418
|
+
if (side === 'old') {
|
|
419
|
+
this.lastOriginalOldOutputIndex = origEnd
|
|
420
|
+
} else {
|
|
421
|
+
this.lastOriginalNewOutputIndex = origEnd
|
|
422
|
+
}
|
|
381
423
|
|
|
382
|
-
|
|
383
|
-
return this.oldWords.slice(origStart, origEnd)
|
|
424
|
+
return words.slice(origStart, origEnd)
|
|
384
425
|
}
|
|
385
426
|
|
|
386
427
|
/**
|
package/test/HtmlDiff.spec.ts
CHANGED
|
@@ -339,4 +339,122 @@ describe('HtmlDiff', () => {
|
|
|
339
339
|
)
|
|
340
340
|
).toEqual(`<div data-html="bar">Some<ins class='diffins'> different</ins> text here</div>`)
|
|
341
341
|
})
|
|
342
|
+
|
|
343
|
+
it('should keep added paragraphs as separate <p> blocks instead of concatenating their content', () => {
|
|
344
|
+
// When new content adds extra paragraphs alongside an existing one, structural
|
|
345
|
+
// normalization currently strips the wrappers and emits the added content as a
|
|
346
|
+
// single concatenated <ins> ("BoopShoop"). Each added paragraph should remain
|
|
347
|
+
// its own <p> so they render on separate lines.
|
|
348
|
+
expect(HtmlDiff.execute('<p>shared</p>', '<p>shared</p><p>Boop</p><p>Shoop</p>')).toEqual(
|
|
349
|
+
`<p>shared</p><p><ins class='diffins'>Boop</ins></p><p><ins class='diffins'>Shoop</ins></p>`
|
|
350
|
+
)
|
|
351
|
+
})
|
|
352
|
+
|
|
353
|
+
it('should keep removed paragraphs as separate <p> blocks instead of concatenating their content', () => {
|
|
354
|
+
expect(HtmlDiff.execute('<p>shared</p><p>Boop</p><p>Shoop</p>', '<p>shared</p>')).toEqual(
|
|
355
|
+
`<p>shared</p><p><del class='diffdel'>Boop</del></p><p><del class='diffdel'>Shoop</del></p>`
|
|
356
|
+
)
|
|
357
|
+
})
|
|
358
|
+
|
|
359
|
+
it('should keep an added paragraph distinct from the preceding paragraph', () => {
|
|
360
|
+
// The compare-bubble regression: a single added <p>Boop</p> after a modified
|
|
361
|
+
// paragraph used to collapse onto the same line as "discovered." because the
|
|
362
|
+
// <p> wrapper was stripped from the inserted content.
|
|
363
|
+
expect(
|
|
364
|
+
HtmlDiff.execute(
|
|
365
|
+
'<p>Shared content has extra removed has been discovered.</p>',
|
|
366
|
+
'<p>Shared content has been discovered.</p><p>Boop</p>'
|
|
367
|
+
)
|
|
368
|
+
).toEqual(
|
|
369
|
+
`<p>Shared content<del class='diffdel'> has extra removed</del> has been discovered.</p><p><ins class='diffins'>Boop</ins></p>`
|
|
370
|
+
)
|
|
371
|
+
})
|
|
372
|
+
|
|
373
|
+
it('should preserve new wrappers when old is plain text and new is wrapped', () => {
|
|
374
|
+
// Asymmetric structure case: when one side has no structural wrappers at all,
|
|
375
|
+
// normalization would produce dangling tags. We disable it for this case so the
|
|
376
|
+
// word-level diff can correctly emit new's structural wrappers around the diff.
|
|
377
|
+
expect(HtmlDiff.execute('X with extra', '<p>X</p><p>Boop</p>')).toEqual(
|
|
378
|
+
`<p>X<del class='diffmod'> with extra</del></p><p><ins class='diffmod'>Boop</ins></p>`
|
|
379
|
+
)
|
|
380
|
+
})
|
|
381
|
+
|
|
382
|
+
it('should preserve new wrappers when old is plain text spanning multiple paragraphs in new', () => {
|
|
383
|
+
expect(
|
|
384
|
+
HtmlDiff.execute(
|
|
385
|
+
'Shared content has extra removed has been discovered.',
|
|
386
|
+
'<div><p>Shared content has been discovered.</p><p>Boop</p></div>'
|
|
387
|
+
)
|
|
388
|
+
).toEqual(
|
|
389
|
+
`<div><p>Shared content<del class='diffdel'> has extra removed</del> has been discovered.</p><p><ins class='diffins'>Boop</ins></p></div>`
|
|
390
|
+
)
|
|
391
|
+
})
|
|
392
|
+
|
|
393
|
+
it('should keep a paragraph added in the middle as its own block', () => {
|
|
394
|
+
expect(HtmlDiff.execute('<p>A</p><p>C</p>', '<p>A</p><p>B</p><p>C</p>')).toEqual(
|
|
395
|
+
`<p>A</p><p><ins class='diffins'>B</ins></p><p>C</p>`
|
|
396
|
+
)
|
|
397
|
+
})
|
|
398
|
+
|
|
399
|
+
it('should keep a paragraph added at the start as its own block', () => {
|
|
400
|
+
expect(HtmlDiff.execute('<p>B</p>', '<p>A</p><p>B</p>')).toEqual(`<p><ins class='diffins'>A</ins></p><p>B</p>`)
|
|
401
|
+
})
|
|
402
|
+
|
|
403
|
+
it('should keep multiple paragraphs added at the start as separate blocks', () => {
|
|
404
|
+
expect(HtmlDiff.execute('<p>C</p>', '<p>A</p><p>B</p><p>C</p>')).toEqual(
|
|
405
|
+
`<p><ins class='diffins'>A</ins></p><p><ins class='diffins'>B</ins></p><p>C</p>`
|
|
406
|
+
)
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
it('should keep a paragraph removed from the start as its own block', () => {
|
|
410
|
+
expect(HtmlDiff.execute('<p>A</p><p>B</p>', '<p>B</p>')).toEqual(`<p><del class='diffdel'>A</del></p><p>B</p>`)
|
|
411
|
+
})
|
|
412
|
+
|
|
413
|
+
it('should keep a paragraph removed from the middle as its own block', () => {
|
|
414
|
+
expect(HtmlDiff.execute('<p>A</p><p>B</p><p>C</p>', '<p>A</p><p>C</p>')).toEqual(
|
|
415
|
+
`<p>A</p><p><del class='diffdel'>B</del></p><p>C</p>`
|
|
416
|
+
)
|
|
417
|
+
})
|
|
418
|
+
|
|
419
|
+
it('should keep paragraph boundaries on a replace operation that spans multiple paragraphs', () => {
|
|
420
|
+
// The diff algorithm treats this as a single Replace block (delete A,B then insert X,Y,Z)
|
|
421
|
+
// rather than aligning per-paragraph; we accept that as the price of a single-pass algorithm
|
|
422
|
+
// but verify each paragraph still renders as its own <p> rather than concatenating.
|
|
423
|
+
expect(HtmlDiff.execute('<p>A</p><p>B</p>', '<p>X</p><p>Y</p><p>Z</p>')).toEqual(
|
|
424
|
+
`<p><del class='diffmod'>A</del></p><p><del class='diffmod'>B</del></p><p><ins class='diffmod'>X</ins></p><p><ins class='diffmod'>Y</ins></p><p><ins class='diffmod'>Z</ins></p>`
|
|
425
|
+
)
|
|
426
|
+
})
|
|
427
|
+
|
|
428
|
+
it('should keep added paragraphs containing inline formatting as their own blocks', () => {
|
|
429
|
+
expect(HtmlDiff.execute('<p>shared</p>', '<p>shared</p><p><strong>bold added</strong></p>')).toEqual(
|
|
430
|
+
`<p>shared</p><p><strong><ins class='diffins'>bold added</ins></strong></p>`
|
|
431
|
+
)
|
|
432
|
+
})
|
|
433
|
+
|
|
434
|
+
it('should preserve a paragraph added inside a list item', () => {
|
|
435
|
+
expect(HtmlDiff.execute('<ol><li><p>shared</p></li></ol>', '<ol><li><p>shared</p><p>added</p></li></ol>')).toEqual(
|
|
436
|
+
`<ol><li><p>shared</p><p><ins class='diffins'>added</ins></p></li></ol>`
|
|
437
|
+
)
|
|
438
|
+
})
|
|
439
|
+
|
|
440
|
+
it('should preserve old wrapper structure when old is wrapped and new is plain text', () => {
|
|
441
|
+
// Inverse of the plain-text-old / wrapped-new asymmetric case. With one side unwrapped,
|
|
442
|
+
// the asymmetric guardrail should disable structural normalization. The word-level diff
|
|
443
|
+
// emits old's paragraph structure with deletions in their original paragraphs and the
|
|
444
|
+
// new content appended inline.
|
|
445
|
+
expect(HtmlDiff.execute('<p>X</p><p>Boop</p>', 'X with extra')).toEqual(
|
|
446
|
+
`<p>X</p><p><del class='diffmod'>Boop</del></p><ins class='diffmod'> with extra</ins>`
|
|
447
|
+
)
|
|
448
|
+
})
|
|
449
|
+
|
|
450
|
+
it.each([
|
|
451
|
+
['section', '<section>X</section>', '<section>X</section><section>Boop</section>'],
|
|
452
|
+
['article', '<article>X</article>', '<article>X</article><article>Boop</article>'],
|
|
453
|
+
['aside', '<aside>X</aside>', '<aside>X</aside><aside>Boop</aside>'],
|
|
454
|
+
['nav', '<nav>X</nav>', '<nav>X</nav><nav>Boop</nav>'],
|
|
455
|
+
])('should treat <%s> as a structural wrapper for paragraph-add diffs', (tag, oldText, newText) => {
|
|
456
|
+
expect(HtmlDiff.execute(oldText, newText)).toEqual(
|
|
457
|
+
`<${tag}>X</${tag}><${tag}><ins class='diffins'>Boop</ins></${tag}>`
|
|
458
|
+
)
|
|
459
|
+
})
|
|
342
460
|
})
|