@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -19
- package/dist/HtmlDiff.cjs +609 -438
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +89 -16
- package/dist/HtmlDiff.d.mts +89 -16
- package/dist/HtmlDiff.mjs +604 -438
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +218 -74
- package/src/ThreeWayDiff.ts +220 -127
- package/src/ThreeWayTable.ts +549 -491
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +316 -92
- package/test/HtmlDiff.threeWay.tables.spec.ts +200 -196
- package/test/Utils.spec.ts +3 -3
package/package.json
CHANGED
package/src/HtmlDiff.ts
CHANGED
|
@@ -71,6 +71,40 @@ export interface AnalyzeResult {
|
|
|
71
71
|
*/
|
|
72
72
|
export type ThreeWayOptions = AnalyzeOptions
|
|
73
73
|
|
|
74
|
+
/**
|
|
75
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
76
|
+
* track-changes rendering for legal-document rewrites.
|
|
77
|
+
*
|
|
78
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
79
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
80
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
81
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
82
|
+
* dramatically more readable for legal text.
|
|
83
|
+
*
|
|
84
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
85
|
+
* Commercial One CP, May 2026):
|
|
86
|
+
* - short edits (typo / one-word insert): output identical to
|
|
87
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
88
|
+
* trivially clears the bar;
|
|
89
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
90
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
91
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
92
|
+
* Word's 1+1 and a major readability win;
|
|
93
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
94
|
+
* formatting changes into a single block — too aggressive.
|
|
95
|
+
*
|
|
96
|
+
* Consumers rendering legal documents should spread this into their
|
|
97
|
+
* options:
|
|
98
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
99
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
100
|
+
*
|
|
101
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
102
|
+
* keep the bare default.
|
|
103
|
+
*/
|
|
104
|
+
export const WORD_ALIGNED_OPTIONS: AnalyzeOptions = {
|
|
105
|
+
orphanMatchThreshold: 0.25,
|
|
106
|
+
}
|
|
107
|
+
|
|
74
108
|
export default class HtmlDiff {
|
|
75
109
|
/**
|
|
76
110
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -162,7 +196,22 @@ export default class HtmlDiff {
|
|
|
162
196
|
// constructor overload that would re-leak the parameter we just hid.
|
|
163
197
|
private tablePreprocessDepth = 0
|
|
164
198
|
|
|
165
|
-
|
|
199
|
+
/**
|
|
200
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
201
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
202
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
203
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
204
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
205
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
206
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
207
|
+
* handler for the split logic.
|
|
208
|
+
*/
|
|
209
|
+
private specialTagDiffStack: Array<{
|
|
210
|
+
tag: string
|
|
211
|
+
styledTagNames: string
|
|
212
|
+
cssClass: string
|
|
213
|
+
metadata: WrapMetadata | undefined
|
|
214
|
+
}> = []
|
|
166
215
|
private newWords: string[] = []
|
|
167
216
|
private oldWords: string[] = []
|
|
168
217
|
/**
|
|
@@ -234,8 +283,31 @@ export default class HtmlDiff {
|
|
|
234
283
|
this.newText = newText
|
|
235
284
|
}
|
|
236
285
|
|
|
237
|
-
|
|
238
|
-
|
|
286
|
+
/**
|
|
287
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
288
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
289
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
290
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
291
|
+
*
|
|
292
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
293
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
294
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
295
|
+
*/
|
|
296
|
+
static execute(oldText: string, newText: string, options: AnalyzeOptions = {}): string {
|
|
297
|
+
const inner = new HtmlDiff(oldText, newText)
|
|
298
|
+
if (options.blockExpressions) {
|
|
299
|
+
for (const expr of options.blockExpressions) inner.addBlockExpression(expr)
|
|
300
|
+
}
|
|
301
|
+
if (options.repeatingWordsAccuracy !== undefined) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy
|
|
302
|
+
if (options.orphanMatchThreshold !== undefined) inner.orphanMatchThreshold = options.orphanMatchThreshold
|
|
303
|
+
if (options.ignoreWhitespaceDifferences !== undefined) {
|
|
304
|
+
inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences
|
|
305
|
+
}
|
|
306
|
+
// `useProjections` is intentionally NOT plumbed here — the 2-way
|
|
307
|
+
// path's build() runs its own heuristic. `analyze` honours it; if
|
|
308
|
+
// you need to force it for a 2-way result, route through `analyze`
|
|
309
|
+
// and consume the operations directly.
|
|
310
|
+
return inner.build()
|
|
239
311
|
}
|
|
240
312
|
|
|
241
313
|
/**
|
|
@@ -321,56 +393,60 @@ export default class HtmlDiff {
|
|
|
321
393
|
}
|
|
322
394
|
|
|
323
395
|
/**
|
|
324
|
-
* Three-way HTML diff
|
|
325
|
-
*
|
|
326
|
-
*
|
|
327
|
-
*
|
|
328
|
-
*
|
|
329
|
-
*
|
|
330
|
-
*
|
|
396
|
+
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
397
|
+
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
398
|
+
* from Me's accumulated changes (genesis → meCurrent). Use this for
|
|
399
|
+
* blackline UX where the negotiation has gone through multiple turns
|
|
400
|
+
* and the reader wants to see "who proposed what" across the whole
|
|
401
|
+
* history, not just the most recent round.
|
|
402
|
+
*
|
|
403
|
+
* When both parties happen to have made the same change (e.g. CP
|
|
404
|
+
* proposed a wording change in turn N, Me adopted it in turn N+1),
|
|
405
|
+
* the change reads as "settled" and is emitted unmarked — only
|
|
406
|
+
* disagreements and pending proposals carry author attribution.
|
|
331
407
|
*
|
|
332
|
-
*
|
|
333
|
-
*
|
|
334
|
-
*
|
|
335
|
-
*
|
|
336
|
-
*
|
|
337
|
-
* override.
|
|
408
|
+
* @param genesis the shared common ancestor (per-user — the FE
|
|
409
|
+
* picks between V1.0 and /preview/initialAnswers
|
|
410
|
+
* based on `prefillReceiverAnswers`)
|
|
411
|
+
* @param cpLatest the counterparty's current published version
|
|
412
|
+
* @param meCurrent Me's current draft (the document on screen)
|
|
338
413
|
*/
|
|
339
|
-
static executeThreeWay(
|
|
340
|
-
return HtmlDiff.executeThreeWayWithDepth(
|
|
414
|
+
static executeThreeWay(genesis: string, cpLatest: string, meCurrent: string, options: ThreeWayOptions = {}): string {
|
|
415
|
+
return HtmlDiff.executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, 0)
|
|
341
416
|
}
|
|
342
417
|
|
|
343
418
|
private static executeThreeWayWithDepth(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
419
|
+
genesis: string,
|
|
420
|
+
cpLatest: string,
|
|
421
|
+
meCurrent: string,
|
|
347
422
|
options: ThreeWayOptions,
|
|
348
423
|
depth: number
|
|
349
424
|
): string {
|
|
350
|
-
// Table preprocessing first — replaces each
|
|
425
|
+
// Table preprocessing first — replaces each genesis/cp/me table with a
|
|
351
426
|
// shared-nonce placeholder, then the word-level merge runs over the
|
|
352
427
|
// table-free inputs. Cells are diffed recursively via executeThreeWay
|
|
353
|
-
// so the cell content is itself three-way attributed.
|
|
354
|
-
// happens at the end.
|
|
428
|
+
// so the cell content is itself three-way attributed.
|
|
355
429
|
//
|
|
356
|
-
// Depth-cap the recursion
|
|
357
|
-
//
|
|
358
|
-
// Beyond the cap, skip table preprocessing entirely and let the
|
|
359
|
-
// word-level merge handle the raw HTML — same bail-out semantics as
|
|
360
|
-
// the 2-way `MaxTablePreprocessDepth` cap.
|
|
430
|
+
// Depth-cap the recursion so adversarially-nested input can't blow
|
|
431
|
+
// stack/memory.
|
|
361
432
|
const tablePreprocess =
|
|
362
433
|
depth < HtmlDiff.MaxThreeWayDepth
|
|
363
|
-
? preprocessTablesThreeWay(
|
|
364
|
-
HtmlDiff.executeThreeWayWithDepth(
|
|
434
|
+
? preprocessTablesThreeWay(genesis, cpLatest, meCurrent, (g, c, m) =>
|
|
435
|
+
HtmlDiff.executeThreeWayWithDepth(g, c, m, options, depth + 1)
|
|
365
436
|
)
|
|
366
437
|
: null
|
|
367
|
-
const
|
|
368
|
-
const
|
|
369
|
-
const
|
|
370
|
-
|
|
438
|
+
const inGenesis = tablePreprocess?.modifiedGenesis ?? genesis
|
|
439
|
+
const inCp = tablePreprocess?.modifiedCp ?? cpLatest
|
|
440
|
+
const inMe = tablePreprocess?.modifiedMe ?? meCurrent
|
|
441
|
+
|
|
442
|
+
// Symmetric projection across both analyses. The genesis-spine
|
|
443
|
+
// algorithm requires `genesis` to tokenise identically on each
|
|
444
|
+
// pair-wise analysis (both have genesis as the OLD side), so the
|
|
445
|
+
// useProjections decision must agree across both calls.
|
|
371
446
|
const useProjections =
|
|
372
447
|
options.useProjections ??
|
|
373
|
-
(HtmlDiff.evaluateProjectionApplicability(
|
|
448
|
+
(HtmlDiff.evaluateProjectionApplicability(inGenesis, inCp) &&
|
|
449
|
+
HtmlDiff.evaluateProjectionApplicability(inGenesis, inMe))
|
|
374
450
|
|
|
375
451
|
const analyzeOpts: AnalyzeOptions = {
|
|
376
452
|
useProjections,
|
|
@@ -379,21 +455,21 @@ export default class HtmlDiff {
|
|
|
379
455
|
orphanMatchThreshold: options.orphanMatchThreshold,
|
|
380
456
|
ignoreWhitespaceDifferences: options.ignoreWhitespaceDifferences,
|
|
381
457
|
}
|
|
382
|
-
const
|
|
383
|
-
const
|
|
458
|
+
const dCp = HtmlDiff.analyze(inGenesis, inCp, analyzeOpts)
|
|
459
|
+
const dMe = HtmlDiff.analyze(inGenesis, inMe, analyzeOpts)
|
|
384
460
|
|
|
385
|
-
// Spine sanity check
|
|
386
|
-
//
|
|
387
|
-
// loudly rather than silently
|
|
388
|
-
if (
|
|
461
|
+
// Spine sanity check — both analyses must share an identical genesis
|
|
462
|
+
// tokenisation. Symmetric useProjections guarantees this; if it ever
|
|
463
|
+
// diverges, fail loudly rather than silently misattribute.
|
|
464
|
+
if (dCp.oldDiffWords.length !== dMe.oldDiffWords.length) {
|
|
389
465
|
throw new Error(
|
|
390
|
-
'HtmlDiff.executeThreeWay:
|
|
391
|
-
`(${
|
|
466
|
+
'HtmlDiff.executeThreeWay: genesis tokenisation diverged across pair-wise analyses ' +
|
|
467
|
+
`(${dCp.oldDiffWords.length} vs ${dMe.oldDiffWords.length}). ` +
|
|
392
468
|
'This indicates the symmetric-projection coordination has a bug.'
|
|
393
469
|
)
|
|
394
470
|
}
|
|
395
471
|
|
|
396
|
-
const segments = buildSegments(
|
|
472
|
+
const segments = buildSegments(dCp, dMe)
|
|
397
473
|
const merged = HtmlDiff.emitSegments(segments)
|
|
398
474
|
return tablePreprocess ? restoreTablePlaceholders(merged, tablePreprocess.placeholderToDiff) : merged
|
|
399
475
|
}
|
|
@@ -404,6 +480,25 @@ export default class HtmlDiff {
|
|
|
404
480
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
405
481
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
406
482
|
* opened in one segment and closed in another stays balanced.
|
|
483
|
+
*
|
|
484
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
485
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
486
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
487
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
488
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
489
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
490
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
491
|
+
* at the end of emission.
|
|
492
|
+
*
|
|
493
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
494
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
495
|
+
* postInject regardless of whether the outer segment is ins or
|
|
496
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
497
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
498
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
499
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
500
|
+
* output is usually acceptable — but the warning IS the signal
|
|
501
|
+
* that the input had a real imbalance worth investigating.
|
|
407
502
|
*/
|
|
408
503
|
private static emitSegments(segments: Segment[]): string {
|
|
409
504
|
const emitter = new HtmlDiff('', '')
|
|
@@ -416,18 +511,21 @@ export default class HtmlDiff {
|
|
|
416
511
|
// insertTag mutates its `words` array; pass a copy.
|
|
417
512
|
emitter.insertTag(tag, baseClass, [...seg.words], metadata)
|
|
418
513
|
}
|
|
419
|
-
// Stack-balance invariant: every special-case opening tag pushed onto
|
|
420
|
-
// `specialTagDiffStack` during emission must have been matched by a
|
|
421
|
-
// closing tag. An unbalanced stack means the input had unbalanced
|
|
422
|
-
// formatting tags AND a Replace at an inconvenient position — the
|
|
423
|
-
// output would be silently malformed (half-closed `<ins>`). Fail
|
|
424
|
-
// loudly so the caller can investigate rather than ship broken HTML.
|
|
425
514
|
if (emitter.specialTagDiffStack.length > 0) {
|
|
426
|
-
|
|
515
|
+
// Log once so we can spot bad inputs in dev tools, but don't
|
|
516
|
+
// throw — the caller's only fallback was to crash the React
|
|
517
|
+
// tree, which is worse than emitting slightly-imperfect HTML.
|
|
518
|
+
// eslint-disable-next-line no-console
|
|
519
|
+
console.warn(
|
|
427
520
|
`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} ` +
|
|
428
|
-
'unclosed formatting
|
|
429
|
-
'
|
|
521
|
+
'unclosed formatting wrap(s) on the stack. Closing defensively. ' +
|
|
522
|
+
'This usually means a formatting tag opens in a del/ins segment ' +
|
|
523
|
+
'and its matching closer is in an equal segment.'
|
|
430
524
|
)
|
|
525
|
+
while (emitter.specialTagDiffStack.length > 0) {
|
|
526
|
+
emitter.content.push('</ins>')
|
|
527
|
+
emitter.specialTagDiffStack.pop()
|
|
528
|
+
}
|
|
431
529
|
}
|
|
432
530
|
return emitter.content.join('')
|
|
433
531
|
}
|
|
@@ -807,8 +905,13 @@ export default class HtmlDiff {
|
|
|
807
905
|
// if there are nonTags, the index of the last tag is the index before the first nonTag.
|
|
808
906
|
const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1
|
|
809
907
|
|
|
810
|
-
|
|
811
|
-
|
|
908
|
+
// Pre-injection sits BEFORE the extracted tag-block content (used
|
|
909
|
+
// by closing tags so `</ins></strong>` reads left-to-right).
|
|
910
|
+
// Post-injection sits AFTER (used by opening tags so the rendered
|
|
911
|
+
// order is `<strong><ins ...>` and by the overlap-split case so
|
|
912
|
+
// the re-opened `<ins>`s sit AFTER the actual closing tag).
|
|
913
|
+
let preInject = ''
|
|
914
|
+
let postInject = ''
|
|
812
915
|
|
|
813
916
|
// handle opening tag
|
|
814
917
|
if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
|
|
@@ -820,10 +923,11 @@ export default class HtmlDiff {
|
|
|
820
923
|
}
|
|
821
924
|
const styledTagNames = Array.from(tagNames).join(' ')
|
|
822
925
|
|
|
823
|
-
this.specialTagDiffStack.push(words[0])
|
|
824
926
|
// Carry the caller's metadata into the formatting-tag wrapper so
|
|
825
927
|
// a 3-way author tag survives a `<strong>`/`<em>` content edit.
|
|
826
|
-
|
|
928
|
+
const styledCssClass = `mod ${styledTagNames}`
|
|
929
|
+
this.specialTagDiffStack.push({ tag: words[0], styledTagNames, cssClass: styledCssClass, metadata })
|
|
930
|
+
postInject = `<ins${Utils.composeTagAttributes(styledCssClass, metadata ?? {})}>`
|
|
827
931
|
if (tag === HtmlDiff.DelTag) {
|
|
828
932
|
words.shift()
|
|
829
933
|
|
|
@@ -835,7 +939,6 @@ export default class HtmlDiff {
|
|
|
835
939
|
}
|
|
836
940
|
// handle closing tag
|
|
837
941
|
else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
|
|
838
|
-
const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop()
|
|
839
942
|
// For delete operations: when the tag block contains a mix of formatting and
|
|
840
943
|
// non-formatting closing tags (e.g. </strong></div>), compare against the first
|
|
841
944
|
// closing tag (the formatting one) rather than the last tag in the block.
|
|
@@ -850,19 +953,39 @@ export default class HtmlDiff {
|
|
|
850
953
|
tagIndexToCompare = 0
|
|
851
954
|
}
|
|
852
955
|
}
|
|
853
|
-
const openingAndClosingTagsMatch =
|
|
854
|
-
!!openingTag && Utils.getTagName(openingTag) === Utils.getTagName(words[tagIndexToCompare])
|
|
855
956
|
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
957
|
+
// Search the stack for a matching opener (LIFO). When the match
|
|
958
|
+
// is the top entry, this is the normal balanced case and we
|
|
959
|
+
// emit a single `</ins>` before the closing tag. When the match
|
|
960
|
+
// is below an unmatched opener — i.e. another formatting wrap
|
|
961
|
+
// opened after it but hasn't been closed yet — the wraps
|
|
962
|
+
// overlap in source order, which has no valid LIFO HTML
|
|
963
|
+
// expression. Resolve by SPLITTING the wraps: close everything
|
|
964
|
+
// above the match (their `<ins>`s and the match's `<ins>`), then
|
|
965
|
+
// re-open the above wraps with fresh `<ins>` tags AFTER the
|
|
966
|
+
// closing tag emits. The above wraps continue to apply until
|
|
967
|
+
// their own closing tag arrives.
|
|
968
|
+
const closingTagName = Utils.getTagName(words[tagIndexToCompare])
|
|
969
|
+
let matchIdx = -1
|
|
970
|
+
for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) {
|
|
971
|
+
if (Utils.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
|
|
972
|
+
matchIdx = i
|
|
973
|
+
break
|
|
974
|
+
}
|
|
859
975
|
}
|
|
860
976
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
977
|
+
if (matchIdx >= 0) {
|
|
978
|
+
const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1)
|
|
979
|
+
this.specialTagDiffStack.pop() // pop the matched entry
|
|
980
|
+
// One `</ins>` per above entry, then one for the match itself.
|
|
981
|
+
preInject = '</ins>'.repeat(aboveEntries.length + 1)
|
|
982
|
+
for (const entry of aboveEntries) {
|
|
983
|
+
postInject += `<ins${Utils.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`
|
|
984
|
+
this.specialTagDiffStack.push(entry) // their wrap continues via the new <ins>
|
|
985
|
+
}
|
|
865
986
|
}
|
|
987
|
+
// No match in stack — orphan closing tag, drop the `<ins>` work
|
|
988
|
+
// and just let the tag itself flow through extractConsecutiveWords.
|
|
866
989
|
|
|
867
990
|
if (tag === HtmlDiff.DelTag) {
|
|
868
991
|
words.shift()
|
|
@@ -873,7 +996,7 @@ export default class HtmlDiff {
|
|
|
873
996
|
}
|
|
874
997
|
}
|
|
875
998
|
|
|
876
|
-
if (words.length === 0 &&
|
|
999
|
+
if (words.length === 0 && preInject.length === 0 && postInject.length === 0) {
|
|
877
1000
|
break
|
|
878
1001
|
}
|
|
879
1002
|
|
|
@@ -889,11 +1012,7 @@ export default class HtmlDiff {
|
|
|
889
1012
|
!HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase())
|
|
890
1013
|
: Utils.isTag
|
|
891
1014
|
|
|
892
|
-
|
|
893
|
-
this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(''))
|
|
894
|
-
} else {
|
|
895
|
-
this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join('') + specialCaseTagInjection)
|
|
896
|
-
}
|
|
1015
|
+
this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join('') + postInject)
|
|
897
1016
|
|
|
898
1017
|
if (words.length === 0) continue
|
|
899
1018
|
|
|
@@ -1000,6 +1119,31 @@ export default class HtmlDiff {
|
|
|
1000
1119
|
continue
|
|
1001
1120
|
}
|
|
1002
1121
|
|
|
1122
|
+
// Never orphan-reject a match whose tokens are ALL HTML tags.
|
|
1123
|
+
// Tag tokens are structural; rejecting `</strong>` / `</em>` as
|
|
1124
|
+
// an orphan match between two content deletions merges the tag
|
|
1125
|
+
// into the deletion, leaving the matching opener unclosed —
|
|
1126
|
+
// browsers then auto-close the opener at the END of the
|
|
1127
|
+
// deletion, producing visually-wrong output (e.g. the body of
|
|
1128
|
+
// a section deletion rendered as bold-italic because the
|
|
1129
|
+
// closing `</strong></em>` ended up after the body deletion
|
|
1130
|
+
// rather than after the heading). The orphan threshold is
|
|
1131
|
+
// designed for stray word matches between heavily-edited spans,
|
|
1132
|
+
// not for formatting boundaries.
|
|
1133
|
+
let allTags = true
|
|
1134
|
+
for (let i = curr.startInNew; i < curr.endInNew; i++) {
|
|
1135
|
+
if (!Utils.isTag(wordsForDiffNew[i])) {
|
|
1136
|
+
allTags = false
|
|
1137
|
+
break
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
if (allTags) {
|
|
1141
|
+
yield curr
|
|
1142
|
+
prev = curr
|
|
1143
|
+
curr = next
|
|
1144
|
+
continue
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1003
1147
|
let oldDistanceInChars = 0
|
|
1004
1148
|
for (let i = prev.endInOld; i < next.startInOld; i++) {
|
|
1005
1149
|
oldDistanceInChars += wordsForDiffOld[i].length
|