@createiq/htmldiff 1.0.4 → 1.0.5-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(diff -u test/input1.html test/input2.html)",
5
+ "Bash(npm test -- test/Bug.spec.tsx)",
6
+ "Bash(timeout 30s npm run test:ci -- test/Bug.spec.tsx)",
7
+ "Bash(npm run build)",
8
+ "Bash(timeout 10s npm run test:ci -- test/Bug.spec.tsx)",
9
+ "Bash(npm run lint)",
10
+ "Bash(npm run test:ci)",
11
+ "Bash(npm run bench:ci)"
12
+ ],
13
+ "deny": []
14
+ }
15
+ }
package/dist/HtmlDiff.cjs CHANGED
@@ -462,8 +462,17 @@ var HtmlDiff = class HtmlDiff {
462
462
  /** Maps content-word index → original word index */
463
463
  oldContentToOriginal = null;
464
464
  newContentToOriginal = null;
465
- /** Tracks the last original old word index output, so equal operations can include leading structural tags */
465
+ /**
466
+ * Tracks the next unwritten word index in oldWords/newWords. Mutated only by
467
+ * {@link sliceOriginalWordsForOp} (each op reads a slice and advances its cursor).
468
+ * Advances monotonically. Used so:
469
+ * - subsequent equal/delete ops know where in old to resume from
470
+ * - subsequent insert ops know where in new to resume from
471
+ * The two cursors are independent: equal/delete output from old and advance the old
472
+ * cursor; insert outputs from new and advances the new cursor.
473
+ */
466
474
  lastOriginalOldOutputIndex = 0;
475
+ lastOriginalNewOutputIndex = 0;
467
476
  matchGranularity = 0;
468
477
  blockExpressions = [];
469
478
  /**
@@ -540,21 +549,32 @@ var HtmlDiff = class HtmlDiff {
540
549
  this.newText = "";
541
550
  }
542
551
  /**
543
- * Checks whether the two word arrays have structural HTML differences (different non-formatting tags
544
- * or different whitespace between structural tags). When they do, builds "content projections" that
545
- * strip structural noise so the diff algorithm only sees meaningful content and formatting changes.
552
+ * Builds "content projections" word arrays with structural wrapper tags stripped when
553
+ * structural normalization is appropriate for these inputs. The diff algorithm operates on
554
+ * the projections so wrapper-tag differences (e.g. `<p>` vs `<div>`) don't appear as content
555
+ * changes; structural tags are then folded back in at output time.
546
556
  */
547
557
  buildContentProjections() {
548
558
  if (!HtmlDiff.hasStructuralDifferences(this.oldWords, this.newWords)) return;
549
559
  const oldProjection = HtmlDiff.createContentProjection(this.oldWords);
550
560
  const newProjection = HtmlDiff.createContentProjection(this.newWords);
551
- if (oldProjection.contentWords.length === 0 || newProjection.contentWords.length === 0) return;
561
+ if (!HtmlDiff.shouldUseContentProjections(this.oldWords, this.newWords, oldProjection, newProjection)) return;
552
562
  this.oldContentWords = oldProjection.contentWords;
553
563
  this.oldContentToOriginal = oldProjection.contentToOriginal;
554
564
  this.newContentWords = newProjection.contentWords;
555
565
  this.newContentToOriginal = newProjection.contentToOriginal;
556
566
  }
557
567
  /**
568
+ * Decides whether structural normalization should be activated for this pair of inputs.
569
+ * Each clause is a distinct correctness or fitness check — extend by adding a named
570
+ * sub-predicate rather than chaining ad-hoc conditions.
571
+ */
572
+ static shouldUseContentProjections(oldWords, newWords, oldProjection, newProjection) {
573
+ if (oldProjection.contentWords.length === 0 || newProjection.contentWords.length === 0) return false;
574
+ if (oldProjection.contentWords.length < oldWords.length !== newProjection.contentWords.length < newWords.length) return false;
575
+ return true;
576
+ }
577
+ /**
558
578
  * Tags that commonly serve as content wrappers and may change structurally
559
579
  * without affecting the actual content. Only these tags are stripped during
560
580
  * structural normalization.
@@ -575,6 +595,10 @@ var HtmlDiff = class HtmlDiff {
575
595
  const tagName = Utils_default.getTagName(word);
576
596
  return HtmlDiff.WrapperTags.has(tagName);
577
597
  }
598
+ /** True when the word is a structural opening tag (e.g. `<p>`, `<div>`). */
599
+ static isOpeningStructuralTag(word) {
600
+ return HtmlDiff.isStructuralTag(word) && !word.startsWith("</");
601
+ }
578
602
  /**
579
603
  * Returns true if words between structural tags are just whitespace (indentation).
580
604
  */
@@ -629,58 +653,61 @@ var HtmlDiff = class HtmlDiff {
629
653
  this.processInsertOperation(operation, "diffmod");
630
654
  }
631
655
  processInsertOperation(operation, cssClass) {
632
- const words = this.oldContentWords ? this.getOriginalNewWords(operation.startInNew, operation.endInNew) : this.newWords.slice(operation.startInNew, operation.endInNew);
656
+ const words = this.usingContentProjections() ? this.sliceOriginalWordsForOp("new", operation.startInNew, operation.endInNew) : this.newWords.slice(operation.startInNew, operation.endInNew);
633
657
  this.insertTag(HtmlDiff.InsTag, cssClass, words);
634
658
  }
635
659
  processDeleteOperation(operation, cssClass) {
636
- const words = this.oldContentWords ? this.getOriginalOldWords(operation.startInOld, operation.endInOld) : this.oldWords.slice(operation.startInOld, operation.endInOld);
660
+ const words = this.usingContentProjections() ? this.sliceOriginalWordsForOp("old", operation.startInOld, operation.endInOld) : this.oldWords.slice(operation.startInOld, operation.endInOld);
637
661
  this.insertTag(HtmlDiff.DelTag, cssClass, words);
638
- if (this.oldContentToOriginal && operation.endInOld > 0) {
639
- const lastDeletedOrigIdx = this.oldContentToOriginal[operation.endInOld - 1];
640
- this.lastOriginalOldOutputIndex = Math.max(this.lastOriginalOldOutputIndex, lastDeletedOrigIdx + 1);
641
- }
642
662
  }
643
663
  processEqualOperation(operation) {
644
- if (this.oldContentWords) {
645
- const result = this.getOriginalOldWordsWithStructure(operation.startInOld, operation.endInOld);
664
+ if (this.usingContentProjections()) {
665
+ const result = this.sliceOriginalWordsForOp("old", operation.startInOld, operation.endInOld);
646
666
  this.content.push(result.join(""));
667
+ this.sliceOriginalWordsForOp("new", operation.startInNew, operation.endInNew);
647
668
  } else {
648
669
  const result = this.newWords.slice(operation.startInNew, operation.endInNew);
649
670
  this.content.push(result.join(""));
650
671
  }
651
672
  }
652
- /**
653
- * Gets original old words for a content-index range, including only content and formatting tags
654
- * (used for delete/replace operations where we don't want structural tags).
655
- */
656
- getOriginalOldWords(contentStart, contentEnd) {
657
- if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd);
658
- const result = [];
659
- for (let i = contentStart; i < contentEnd; i++) result.push(this.oldWords[this.oldContentToOriginal[i]]);
660
- return result;
661
- }
662
- /**
663
- * Gets original new words for a content-index range, including only content and formatting tags
664
- * (used for insert/replace operations where we don't want structural tags).
665
- */
666
- getOriginalNewWords(contentStart, contentEnd) {
667
- if (!this.newContentToOriginal) return this.newWords.slice(contentStart, contentEnd);
668
- const result = [];
669
- for (let i = contentStart; i < contentEnd; i++) result.push(this.newWords[this.newContentToOriginal[i]]);
670
- return result;
673
+ /** True when content projections are active for both sides — i.e. structural normalization is in effect. */
674
+ usingContentProjections() {
675
+ return this.oldContentToOriginal !== null && this.newContentToOriginal !== null;
671
676
  }
672
677
  /**
673
- * Gets original old words for a content-index range, INCLUDING structural tags and whitespace
674
- * between the content words (used for equal operations to preserve old HTML structure).
678
+ * Returns the slice of original (old or new) words covering a content-index range,
679
+ * including the structural tags that surround the content. Advances the side's cursor
680
+ * past the slice so the next op resumes correctly.
681
+ *
682
+ * The slice extends:
683
+ * - LEADING: from the side's cursor (or the first content word's original index,
684
+ * whichever is smaller) so structural tags that precede the first content word
685
+ * are picked up by this op rather than left orphaned.
686
+ * - TRAILING (non-last range): from just after the last content word, including
687
+ * closing structural tags that close *this* op's paragraphs, but stopping at
688
+ * the first opening structural tag — that opening tag belongs to the next
689
+ * op's paragraph and would otherwise be emitted twice.
690
+ * - TRAILING (last range): all the way to the end of words, since there is no next
691
+ * op to claim the trailing tags.
675
692
  */
676
- getOriginalOldWordsWithStructure(contentStart, contentEnd) {
677
- if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd);
693
+ sliceOriginalWordsForOp(side, contentStart, contentEnd) {
694
+ const words = side === "old" ? this.oldWords : this.newWords;
695
+ const contentToOriginal = side === "old" ? this.oldContentToOriginal : this.newContentToOriginal;
696
+ if (!contentToOriginal) return words.slice(contentStart, contentEnd);
678
697
  if (contentStart >= contentEnd) return [];
679
- const firstContentOrigIdx = this.oldContentToOriginal[contentStart];
680
- const origStart = Math.min(this.lastOriginalOldOutputIndex, firstContentOrigIdx);
681
- const origEnd = contentEnd < this.oldContentToOriginal.length ? this.oldContentToOriginal[contentEnd] : this.oldWords.length;
682
- this.lastOriginalOldOutputIndex = origEnd;
683
- return this.oldWords.slice(origStart, origEnd);
698
+ const firstContentOrigIdx = contentToOriginal[contentStart];
699
+ const lastContentOrigIdx = contentToOriginal[contentEnd - 1];
700
+ const cursor = side === "old" ? this.lastOriginalOldOutputIndex : this.lastOriginalNewOutputIndex;
701
+ const origStart = Math.min(cursor, firstContentOrigIdx);
702
+ let origEnd;
703
+ if (contentEnd < contentToOriginal.length) {
704
+ const limit = contentToOriginal[contentEnd];
705
+ origEnd = lastContentOrigIdx + 1;
706
+ while (origEnd < limit && !HtmlDiff.isOpeningStructuralTag(words[origEnd])) origEnd++;
707
+ } else origEnd = words.length;
708
+ if (side === "old") this.lastOriginalOldOutputIndex = origEnd;
709
+ else this.lastOriginalNewOutputIndex = origEnd;
710
+ return words.slice(origStart, origEnd);
684
711
  }
685
712
  /**
686
713
  * This method encloses words within a specified tag (ins or del), and adds this into "content",
@@ -1 +1 @@
1
- {"version":3,"file":"HtmlDiff.cjs","names":["Utils","Utils","Utils"],"sources":["../src/Match.ts","../src/Utils.ts","../src/MatchFinder.ts","../src/Operation.ts","../src/WordSplitter.ts","../src/HtmlDiff.ts"],"sourcesContent":["export default class Match {\n private _startInOld: number\n private _startInNew: number\n private _size: number\n\n constructor(startInOld: number, startInNew: number, size: number) {\n this._startInOld = startInOld\n this._startInNew = startInNew\n this._size = size\n }\n\n get startInOld() {\n return this._startInOld\n }\n\n get startInNew() {\n return this._startInNew\n }\n\n get size() {\n return this._size\n }\n\n get endInOld() {\n return this._startInOld + this._size\n }\n\n get endInNew() {\n return this._startInNew + this._size\n }\n}\n","const openingTagRegex = /^\\s*<[^>]+>\\s*$/\nconst closingTagTexRegex = /^\\s*<\\/[^>]+>\\s*$/\nconst tagWordRegex = /<[^\\s>]+/\nconst whitespaceRegex = /^(\\s|&nbsp;)+$/\nconst wordRegex = /[\\w#@]+/\nconst tagRegex = /<\\/?(?<name>[^\\s/>]+)[^>]*>/\n\nconst SpecialCaseWordTags: readonly string[] = ['<img']\n\nexport function isTag(item: string): boolean {\n if (SpecialCaseWordTags.some(re => item?.startsWith(re))) {\n return false\n }\n\n return isOpeningTag(item) || isClosingTag(item)\n}\n\nfunction isOpeningTag(item: string): boolean {\n return openingTagRegex.test(item)\n}\n\nfunction isClosingTag(item: string): boolean {\n return closingTagTexRegex.test(item)\n}\n\nexport function stripTagAttributes(word: string): string {\n const match = tagWordRegex.exec(word)\n if (match) {\n return `${match[0]}${word.endsWith('/>') ? '/>' : '>'}`\n }\n\n return word\n}\n\nexport function wrapText(text: string, tagName: string, cssClass: string): string {\n return `<${tagName} class='${cssClass}'>${text}</${tagName}>`\n}\n\nexport function isStartOfTag(val: string): boolean {\n return val === '<'\n}\n\nexport function isEndOfTag(val: string): boolean {\n return val === '>'\n}\n\nexport function isStartOfEntity(val: string): boolean {\n return val === '&'\n}\n\nexport function isEndOfEntity(val: string): boolean {\n return val === ';'\n}\n\nexport function isWhiteSpace(value: string): boolean {\n return whitespaceRegex.test(value)\n}\n\nexport function stripAnyAttributes(word: string): string {\n if (isTag(word)) {\n return stripTagAttributes(word)\n }\n\n return word\n}\n\nexport function isWord(text: string): boolean {\n return wordRegex.test(text)\n}\n\nexport function getTagName(word: string | null): string {\n if (word === null) {\n return ''\n }\n\n const match = tagRegex.exec(word)\n if (match) {\n return match.groups?.name.toLowerCase() ?? match[1].toLowerCase()\n }\n\n return ''\n}\n\nexport default {\n isTag,\n stripTagAttributes,\n wrapText,\n isStartOfTag,\n isEndOfTag,\n isStartOfEntity,\n isEndOfEntity,\n isWhiteSpace,\n stripAnyAttributes,\n isWord,\n getTagName,\n}\n","import Match from './Match'\nimport type MatchOptions from './MatchOptions'\nimport Utils from './Utils'\n\n/**\n * Finds the longest match in given texts. It uses indexing with fixed granularity that is used to compare blocks of text.\n */\nexport default class MatchFinder {\n private oldWords: string[]\n private newWords: string[]\n private startInOld: number\n private endInOld: number\n private startInNew: number\n private endInNew: number\n private wordIndices: { [word: string]: number[] } = {}\n private options: MatchOptions\n\n constructor(\n oldWords: string[],\n newWords: string[],\n startInOld: number,\n endInOld: number,\n startInNew: number,\n endInNew: number,\n options: MatchOptions\n ) {\n this.oldWords = oldWords\n this.newWords = newWords\n this.startInOld = startInOld\n this.endInOld = endInOld\n this.startInNew = startInNew\n this.endInNew = endInNew\n this.options = options\n }\n\n private indexNewWords() {\n this.wordIndices = {}\n const block: string[] = []\n for (let i = this.startInNew; i < this.endInNew; i++) {\n // if word is a tag, we should ignore attributes as attribute changes are not supported (yet)\n const word = this.normalizeForIndex(this.newWords[i])\n const key = MatchFinder.putNewWord(block, word, this.options.blockSize)\n\n if (key === null) {\n continue\n }\n\n if (!this.wordIndices[key]) {\n this.wordIndices[key] = []\n }\n this.wordIndices[key].push(i)\n }\n }\n\n private static putNewWord(block: string[], word: string, blockSize: number): string | null {\n block.push(word)\n\n if (block.length > blockSize) {\n block.shift()\n }\n\n if (block.length !== blockSize) {\n return null\n }\n\n return block.join('')\n }\n\n private normalizeForIndex(word: string): string {\n const output = Utils.stripAnyAttributes(word)\n if (this.options.ignoreWhitespaceDifferences && Utils.isWhiteSpace(output)) {\n return ' '\n }\n\n return output\n }\n\n findMatch(): Match | null {\n this.indexNewWords()\n this.removeRepeatingWords()\n\n let hasIndices = false\n for (const _key in this.wordIndices) {\n hasIndices = true\n break\n }\n if (!hasIndices) {\n return null\n }\n\n let bestMatchInOld = this.startInOld\n let bestMatchInNew = this.startInNew\n let bestMatchSize = 0\n\n let matchLengthAt: Map<number, number> = new Map()\n const block: string[] = []\n\n for (let indexInOld = this.startInOld; indexInOld < this.endInOld; indexInOld++) {\n const word = this.normalizeForIndex(this.oldWords[indexInOld])\n const index = MatchFinder.putNewWord(block, word, this.options.blockSize)\n\n if (index === null) {\n continue\n }\n\n const newMatchLengthAt: Map<number, number> = new Map()\n\n if (!this.wordIndices[index]) {\n matchLengthAt = newMatchLengthAt\n continue\n }\n\n for (const indexInNew of this.wordIndices[index]) {\n // biome-ignore lint/style/noNonNullAssertion: This is safe as guarded by has()\n const newMatchLength = (matchLengthAt.has(indexInNew - 1) ? matchLengthAt.get(indexInNew - 1)! : 0) + 1\n newMatchLengthAt.set(indexInNew, newMatchLength)\n\n if (newMatchLength > bestMatchSize) {\n bestMatchInOld = indexInOld - newMatchLength - this.options.blockSize + 2\n bestMatchInNew = indexInNew - newMatchLength - this.options.blockSize + 2\n bestMatchSize = newMatchLength\n }\n }\n\n matchLengthAt = newMatchLengthAt\n }\n\n return bestMatchSize !== 0\n ? new Match(bestMatchInOld, bestMatchInNew, bestMatchSize + this.options.blockSize - 1)\n : null\n }\n\n /**\n * This method removes words that occur too many times. This way it reduces total count of comparison operations\n * and as result the diff algorithm takes less time. But the side effect is that it may detect false differences of\n * the repeating words.\n * @private\n */\n private removeRepeatingWords() {\n const threshold = this.newWords.length * this.options.repeatingWordsAccuracy\n const repeatingWords = Object.entries(this.wordIndices)\n .filter(([, indices]) => indices.length > threshold)\n .map(([word]) => word)\n\n for (const w of repeatingWords) {\n delete this.wordIndices[w]\n }\n }\n}\n","import type Action from './Action'\n\nexport default class Operation {\n action: Action\n startInOld: number\n endInOld: number\n startInNew: number\n endInNew: number\n\n constructor(action: Action, startInOld: number, endInOld: number, startInNew: number, endInNew: number) {\n this.action = action\n this.startInOld = startInOld\n this.endInOld = endInOld\n this.startInNew = startInNew\n this.endInNew = endInNew\n }\n}\n","import Mode from './Mode'\nimport Utils from './Utils'\n\nexport default class WordSplitter {\n private text: string\n private isBlockCheckRequired: boolean\n private blockLocations: BlockFinderResult\n private mode: Mode\n private isGrouping = false\n private globbingUntil: number\n private currentWord: string[]\n private words: string[]\n private static NotGlobbing = -1\n\n private get currentWordHasChars() {\n return this.currentWord.length > 0\n }\n\n constructor(text: string, blockExpressions: RegExp[]) {\n this.text = text\n this.blockLocations = new BlockFinder(text, blockExpressions).findBlocks()\n this.isBlockCheckRequired = this.blockLocations.hasBlocks\n this.mode = Mode.Character\n this.globbingUntil = WordSplitter.NotGlobbing\n this.currentWord = []\n this.words = []\n }\n\n process(): string[] {\n for (let index = 0; index < this.text.length; index++) {\n const character = this.text.charAt(index)\n this.processCharacter(index, character)\n }\n\n this.appendCurrentWordToWords()\n return this.words\n }\n\n private processCharacter(index: number, character: string) {\n if (this.isGlobbing(index, character)) {\n return\n }\n\n switch (this.mode) {\n case Mode.Character:\n this.processTextCharacter(character)\n break\n case Mode.Tag:\n this.processHtmlTagContinuation(character)\n break\n case Mode.Whitespace:\n this.processWhiteSpaceContinuation(character)\n break\n case Mode.Entity:\n this.processEntityContinuation(character)\n break\n }\n }\n\n private processEntityContinuation(character: string) {\n if (Utils.isStartOfTag(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Tag\n } else if (character.trim().length === 0) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Whitespace\n } else if (Utils.isEndOfEntity(character)) {\n let switchToNextMode = true\n if (this.currentWordHasChars) {\n this.currentWord.push(character)\n this.words.push(this.currentWord.join(''))\n\n //join &nbsp; entity with last whitespace\n if (\n this.words.length > 2 &&\n Utils.isWhiteSpace(this.words[this.words.length - 2]) &&\n Utils.isWhiteSpace(this.words[this.words.length - 1])\n ) {\n const w1 = this.words[this.words.length - 2]\n const w2 = this.words[this.words.length - 1]\n this.words.splice(this.words.length - 2, 2)\n this.currentWord = `${w1}${w2}`.split('')\n this.mode = Mode.Whitespace\n switchToNextMode = false\n }\n }\n\n if (switchToNextMode) {\n this.currentWord = []\n this.mode = Mode.Character\n }\n } else if (Utils.isWord(character)) {\n this.currentWord.push(character)\n } else {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Character\n }\n }\n\n private processWhiteSpaceContinuation(character: string) {\n if (Utils.isStartOfTag(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Tag\n } else if (Utils.isStartOfEntity(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Entity\n } else if (Utils.isWhiteSpace(character)) {\n this.currentWord.push(character)\n } else {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Character\n }\n }\n\n private processHtmlTagContinuation(character: string) {\n if (Utils.isEndOfTag(character)) {\n this.currentWord.push(character)\n this.appendCurrentWordToWords()\n this.mode = Utils.isWhiteSpace(character) ? Mode.Whitespace : Mode.Character\n } else {\n this.currentWord.push(character)\n }\n }\n\n private processTextCharacter(character: string) {\n if (Utils.isStartOfTag(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push('<')\n this.mode = Mode.Tag\n } else if (Utils.isStartOfEntity(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Entity\n } else if (Utils.isWhiteSpace(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Whitespace\n } else if (\n Utils.isWord(character) &&\n (this.currentWord.length === 0 || Utils.isWord(this.currentWord[this.currentWord.length - 1]))\n ) {\n this.currentWord.push(character)\n } else {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n }\n }\n\n private appendCurrentWordToWords() {\n if (this.currentWordHasChars) {\n this.words.push(this.currentWord.join(''))\n this.currentWord = []\n }\n }\n\n private isGlobbing(index: number, character: string): boolean {\n if (!this.isBlockCheckRequired) {\n return false\n }\n const isCurrentBlockTerminating = index === this.globbingUntil\n if (isCurrentBlockTerminating) {\n this.globbingUntil = WordSplitter.NotGlobbing\n this.isGrouping = false\n this.appendCurrentWordToWords()\n }\n\n const until = this.blockLocations.isInBlock(index)\n if (until) {\n this.isGrouping = true\n this.globbingUntil = until\n }\n if (this.isGrouping) {\n this.currentWord.push(character)\n this.mode = Mode.Character\n }\n return this.isGrouping\n }\n\n static convertHtmlToListOfWords(text: string, blockExpressions: RegExp[]): string[] {\n return new WordSplitter(text, blockExpressions).process()\n }\n}\n\nclass BlockFinderResult {\n private blocks: Map<number, number> = new Map()\n\n addBlock(from: number, to: number) {\n if (this.blocks.has(from)) {\n throw new ArgumentError('One or more block expressions result in a text sequence that overlaps.')\n }\n\n this.blocks.set(from, to)\n }\n\n isInBlock(location: number): number | null {\n return this.blocks.get(location) ?? null\n }\n\n get hasBlocks() {\n return this.blocks.size > 0\n }\n}\n\nclass ArgumentError extends Error {}\n\nclass BlockFinder {\n private text: string\n private blockExpressions: RegExp[]\n\n constructor(text: string, blockExpressions: RegExp[]) {\n this.text = text\n this.blockExpressions = blockExpressions\n }\n\n findBlocks(): BlockFinderResult {\n const result = new BlockFinderResult()\n for (const expression of this.blockExpressions) {\n this.processBlockMatcher(expression, result)\n }\n return result\n }\n\n private processBlockMatcher(exp: RegExp, result: BlockFinderResult) {\n let match: RegExpExecArray | null\n // biome-ignore lint/suspicious/noAssignInExpressions: Couldn't think of a nicer way to do this\n while ((match = exp.exec(this.text)) !== null) {\n this.tryAddBlock(exp, match, result)\n }\n }\n\n private tryAddBlock(exp: RegExp, match: RegExpExecArray, result: BlockFinderResult) {\n try {\n const from = match.index\n const to = match.index + match[0].length\n result.addBlock(from, to)\n } catch {\n throw new ArgumentError(\n `One or more block expressions result in a text sequence that overlaps. Current expression: ${exp}`\n )\n }\n }\n}\n","import Action from './Action'\nimport Match from './Match'\nimport MatchFinder from './MatchFinder'\nimport Operation from './Operation'\nimport Utils from './Utils'\nimport WordSplitter from './WordSplitter'\n\nexport default class HtmlDiff {\n /**\n * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.\n * @private\n */\n private static MatchGranularityMaximum = 4\n\n private static DelTag = 'del'\n private static InsTag = 'ins'\n\n // ignore case\n private static SpecialCaseClosingTags = [\n '</strong>',\n '</em>',\n '</b>',\n '</i>',\n '</big>',\n '</small>',\n '</u>',\n '</sub>',\n '</sup>',\n '</strike>',\n '</s>',\n '</span>',\n ]\n\n private static SpecialCaseClosingTagsSet = new Set([\n '</strong>',\n '</em>',\n '</b>',\n '</i>',\n '</big>',\n '</small>',\n '</u>',\n '</sub>',\n '</sup>',\n '</strike>',\n '</s>',\n '</span>',\n ])\n\n private static SpecialCaseOpeningTagRegex =\n /<((strong)|(b)|(i)|(em)|(big)|(small)|(u)|(sub)|(sup)|(strike)|(s)|(span))[>\\s]+/i\n\n private static FormattingTags = new Set([\n 'strong',\n 'em',\n 'b',\n 'i',\n 'big',\n 'small',\n 'u',\n 'sub',\n 'sup',\n 'strike',\n 's',\n 'span',\n ])\n\n private content: string[] = []\n private newText: string\n private oldText: string\n\n private specialTagDiffStack: string[] = []\n private newWords: string[] = []\n private oldWords: string[] = []\n /**\n * Content-only projections of oldWords/newWords (structural tags and adjacent whitespace removed).\n * When null, no structural normalization is applied (the word arrays are identical for diffing).\n */\n private oldContentWords: string[] | null = null\n private newContentWords: string[] | null = null\n /** Maps content-word index → original word index */\n private oldContentToOriginal: number[] | null = null\n private newContentToOriginal: number[] | null = null\n /** Tracks the last original old word index output, so equal operations can include leading structural tags */\n private lastOriginalOldOutputIndex = 0\n private matchGranularity = 0\n private blockExpressions: RegExp[] = []\n\n /**\n * Defines how to compare repeating words. Valid values are from 0 to 1.\n * This value allows to exclude some words from comparison that eventually\n * reduces the total time of the diff algorithm.\n * 0 means that all words are excluded so the diff will not find any matching words at all.\n * 1 (default value) means that all words participate in comparison so this is the most accurate case.\n * 0.5 means that any word that occurs more than 50% times may be excluded from comparison. This doesn't\n * mean that such words will definitely be excluded but only gives a permission to exclude them if necessary.\n */\n repeatingWordsAccuracy = 1.0\n\n /**\n * If true all whitespaces are considered as equal\n */\n ignoreWhitespaceDifferences = false\n\n /**\n * If some match is too small and located far from its neighbors then it is considered as orphan\n * and removed. For example:\n * <code>\n * aaaaa bb ccccccccc dddddd ee\n * 11111 bb 222222222 dddddd ee\n * </code>\n * will find two matches <code>bb</code> and <code>dddddd ee</code> but the first will be considered\n * as orphan and ignored, as result it will consider texts <code>aaaaa bb ccccccccc</code> and\n * <code>11111 bb 222222222</code> as single replacement:\n * <code>\n * &lt;del&gt;aaaaa bb ccccccccc&lt;/del&gt;&lt;ins&gt;11111 bb 222222222&lt;/ins&gt; dddddd ee\n * </code>\n * This property defines relative size of the match to be considered as orphan, from 0 to 1.\n * 1 means that all matches will be considered as orphans.\n * 0 (default) means that no match will be considered as orphan.\n * 0.2 means that if match length is less than 20% of distance between its neighbors it is considered as orphan.\n */\n orphanMatchThreshold = 0.0\n\n /**\n * Initializes a new instance of the class.\n * @param oldText The old text.\n * @param newText The new text.\n */\n constructor(oldText: string, newText: string) {\n this.oldText = oldText\n this.newText = newText\n }\n\n static execute(oldText: string, newText: string) {\n return new HtmlDiff(oldText, newText).build()\n }\n\n /**\n * Builds the HTML diff output\n * @return HTML diff markup\n */\n build(): string {\n // If there is no difference, don't bother checking for differences\n if (this.oldText === this.newText) {\n return this.newText\n }\n\n this.splitInputsToWords()\n this.buildContentProjections()\n\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n this.matchGranularity = Math.min(\n HtmlDiff.MatchGranularityMaximum,\n Math.min(wordsForDiffOld.length, wordsForDiffNew.length)\n )\n\n const operations = this.operations()\n for (const op of operations) {\n this.performOperation(op)\n }\n\n return this.content.join('')\n }\n\n /**\n * Uses {@link expression} to group text together so that any change detected within the group is treated as a single block\n * @param expression\n */\n addBlockExpression(expression: RegExp) {\n this.blockExpressions.push(expression)\n }\n\n private splitInputsToWords() {\n this.oldWords = WordSplitter.convertHtmlToListOfWords(this.oldText, this.blockExpressions)\n\n // free memory, allow it for GC\n this.oldText = ''\n\n this.newWords = WordSplitter.convertHtmlToListOfWords(this.newText, this.blockExpressions)\n\n // free memory, allow it for GC\n this.newText = ''\n }\n\n /**\n * Checks whether the two word arrays have structural HTML differences (different non-formatting tags\n * or different whitespace between structural tags). When they do, builds \"content projections\" that\n * strip structural noise so the diff algorithm only sees meaningful content and formatting changes.\n */\n private buildContentProjections() {\n // Only use projections if the structural tags actually differ.\n // If structural tags are the same, the normal diff works fine and is simpler.\n if (!HtmlDiff.hasStructuralDifferences(this.oldWords, this.newWords)) {\n return\n }\n\n const oldProjection = HtmlDiff.createContentProjection(this.oldWords)\n const newProjection = HtmlDiff.createContentProjection(this.newWords)\n\n // Don't activate structural normalization when one side has no content —\n // that's a genuine addition/deletion, not a re-wrapping scenario.\n if (oldProjection.contentWords.length === 0 || newProjection.contentWords.length === 0) {\n return\n }\n\n this.oldContentWords = oldProjection.contentWords\n this.oldContentToOriginal = oldProjection.contentToOriginal\n this.newContentWords = newProjection.contentWords\n this.newContentToOriginal = newProjection.contentToOriginal\n }\n\n /**\n * Tags that commonly serve as content wrappers and may change structurally\n * without affecting the actual content. Only these tags are stripped during\n * structural normalization.\n */\n private static WrapperTags = new Set(['div', 'p', 'section', 'article', 'main', 'header', 'footer', 'aside', 'nav'])\n\n private static isStructuralTag(word: string): boolean {\n if (!Utils.isTag(word)) return false\n const tagName = Utils.getTagName(word)\n return HtmlDiff.WrapperTags.has(tagName)\n }\n\n /**\n * Returns true if words between structural tags are just whitespace (indentation).\n */\n private static isStructuralWhitespace(words: string[], index: number): boolean {\n if (!Utils.isWhiteSpace(words[index])) return false\n\n // Check if this whitespace is adjacent to a structural tag on either side\n const prevIsStructural = index === 0 || HtmlDiff.isStructuralTag(words[index - 1])\n const nextIsStructural = index === words.length - 1 || HtmlDiff.isStructuralTag(words[index + 1])\n return prevIsStructural || nextIsStructural\n }\n\n private static createContentProjection(words: string[]): {\n contentWords: string[]\n contentToOriginal: number[]\n } {\n const contentWords: string[] = []\n const contentToOriginal: number[] = []\n\n for (let i = 0; i < words.length; i++) {\n if (HtmlDiff.isStructuralTag(words[i])) continue\n if (HtmlDiff.isStructuralWhitespace(words, i)) continue\n contentWords.push(words[i])\n contentToOriginal.push(i)\n }\n\n return { contentWords, contentToOriginal }\n }\n\n private static hasStructuralDifferences(oldWords: string[], newWords: string[]): boolean {\n const oldStructural: string[] = []\n const newStructural: string[] = []\n\n // Compare only tag names (stripped of attributes) since structural normalization\n // is about wrapper tag name changes (e.g. <p> vs <div>), not attribute differences.\n // Attribute changes on the same tag name don't need projection-based normalization.\n for (const w of oldWords) {\n if (HtmlDiff.isStructuralTag(w)) {\n oldStructural.push(Utils.stripTagAttributes(w))\n }\n }\n for (const w of newWords) {\n if (HtmlDiff.isStructuralTag(w)) {\n newStructural.push(Utils.stripTagAttributes(w))\n }\n }\n\n if (oldStructural.length !== newStructural.length) return true\n for (let i = 0; i < oldStructural.length; i++) {\n if (oldStructural[i] !== newStructural[i]) return true\n }\n return false\n }\n\n private performOperation(operation: Operation) {\n switch (operation.action) {\n case Action.Equal:\n this.processEqualOperation(operation)\n break\n case Action.Delete:\n this.processDeleteOperation(operation, 'diffdel')\n break\n case Action.Insert:\n this.processInsertOperation(operation, 'diffins')\n break\n case Action.None:\n break\n case Action.Replace:\n this.processReplaceOperation(operation)\n break\n }\n }\n\n private processReplaceOperation(operation: Operation) {\n this.processDeleteOperation(operation, 'diffmod')\n this.processInsertOperation(operation, 'diffmod')\n }\n\n private processInsertOperation(operation: Operation, cssClass: string) {\n const words = this.oldContentWords\n ? this.getOriginalNewWords(operation.startInNew, operation.endInNew)\n : this.newWords.slice(operation.startInNew, operation.endInNew)\n this.insertTag(HtmlDiff.InsTag, cssClass, words)\n }\n\n private processDeleteOperation(operation: Operation, cssClass: string) {\n const words = this.oldContentWords\n ? this.getOriginalOldWords(operation.startInOld, operation.endInOld)\n : this.oldWords.slice(operation.startInOld, operation.endInOld)\n this.insertTag(HtmlDiff.DelTag, cssClass, words)\n\n // Advance the tracking index past the deleted range so subsequent equal operations\n // don't re-include structural tags from the deleted section.\n if (this.oldContentToOriginal && operation.endInOld > 0) {\n const lastDeletedOrigIdx = this.oldContentToOriginal[operation.endInOld - 1]\n this.lastOriginalOldOutputIndex = Math.max(this.lastOriginalOldOutputIndex, lastDeletedOrigIdx + 1)\n }\n }\n\n private processEqualOperation(operation: Operation) {\n if (this.oldContentWords) {\n // When using content projections, output from old original words to preserve old structure\n const result = this.getOriginalOldWordsWithStructure(operation.startInOld, operation.endInOld)\n this.content.push(result.join(''))\n } else {\n const result = this.newWords.slice(operation.startInNew, operation.endInNew)\n this.content.push(result.join(''))\n }\n }\n\n /**\n * Gets original old words for a content-index range, including only content and formatting tags\n * (used for delete/replace operations where we don't want structural tags).\n */\n private getOriginalOldWords(contentStart: number, contentEnd: number): string[] {\n if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd)\n const result: string[] = []\n for (let i = contentStart; i < contentEnd; i++) {\n result.push(this.oldWords[this.oldContentToOriginal[i]])\n }\n return result\n }\n\n /**\n * Gets original new words for a content-index range, including only content and formatting tags\n * (used for insert/replace operations where we don't want structural tags).\n */\n private getOriginalNewWords(contentStart: number, contentEnd: number): string[] {\n if (!this.newContentToOriginal) return this.newWords.slice(contentStart, contentEnd)\n const result: string[] = []\n for (let i = contentStart; i < contentEnd; i++) {\n result.push(this.newWords[this.newContentToOriginal[i]])\n }\n return result\n }\n\n /**\n * Gets original old words for a content-index range, INCLUDING structural tags and whitespace\n * between the content words (used for equal operations to preserve old HTML structure).\n */\n private getOriginalOldWordsWithStructure(contentStart: number, contentEnd: number): string[] {\n if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd)\n if (contentStart >= contentEnd) return []\n\n // Start from where we last left off in the original array (or the first content word's\n // original index, whichever is smaller) to include any structural tags that precede\n // the content words in this range.\n const firstContentOrigIdx = this.oldContentToOriginal[contentStart]\n const origStart = Math.min(this.lastOriginalOldOutputIndex, firstContentOrigIdx)\n\n // Include up to (but not including) the next content word's original index,\n // or to the end of oldWords if this is the last content range\n const origEnd =\n contentEnd < this.oldContentToOriginal.length ? this.oldContentToOriginal[contentEnd] : this.oldWords.length\n\n this.lastOriginalOldOutputIndex = origEnd\n return this.oldWords.slice(origStart, origEnd)\n }\n\n /**\n * This method encloses words within a specified tag (ins or del), and adds this into \"content\",\n * with a twist: if there are words contain tags, it actually creates multiple ins or del,\n * so that they don't include any ins or del. This handles cases like\n * old: '<p>a</p>'\n * new: '<p>ab</p>\n * <p>\n * c</b>'\n * diff result: '<p>a<ins>b</ins></p>\n * <p>\n * <ins>c</ins>\n * </p>\n * '\n * this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or\n * del tags), but handles correctly more cases than the earlier version.\n * P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.\n * @param tag\n * @param cssClass\n * @param words\n * @private\n */\n private insertTag(tag: string, cssClass: string, words: string[]) {\n while (true) {\n if (words.length === 0) {\n break\n }\n\n const allWordsUntilFirstTag = this.extractConsecutiveWords(words, x => !Utils.isTag(x))\n if (allWordsUntilFirstTag.length > 0) {\n const text = Utils.wrapText(allWordsUntilFirstTag.join(''), tag, cssClass)\n this.content.push(text)\n }\n\n const isInsertOpCompleted = words.length === 0\n if (isInsertOpCompleted) {\n break\n }\n\n // if there are still words left, they must start with a tag, but still can contain nonTag entries.\n // e.g. </span></big>bar\n // the remaining words need to be handled separately divided in a tagBlock, which definitely contains\n // at least one word and a potentially existing second block which starts with a nonTag but may\n // contain tags later on.\n const indexOfFirstNonTag = words.findIndex(x => !Utils.isTag(x))\n\n // if there are no nonTags, the whole block is a tagBlock and the index of the last tag is the last index of the block.\n // if there are nonTags, the index of the last tag is the index before the first nonTag.\n const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1\n\n let specialCaseTagInjection = ''\n let specialCaseTagInjectionIsBefore = false\n\n // handle opening tag\n if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {\n const tagNames = new Set<string>()\n for (const word of words) {\n if (Utils.isTag(word)) {\n tagNames.add(Utils.getTagName(word))\n }\n }\n const styledTagNames = Array.from(tagNames).join(' ')\n\n this.specialTagDiffStack.push(words[0])\n specialCaseTagInjection = `<ins class='mod ${styledTagNames}'>`\n if (tag === HtmlDiff.DelTag) {\n words.shift()\n\n // following tags may be formatting tags as well, follow through\n while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {\n words.shift()\n }\n }\n }\n // handle closing tag\n else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {\n const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop()\n // For delete operations: when the tag block contains a mix of formatting and\n // non-formatting closing tags (e.g. </strong></div>), compare against the first\n // closing tag (the formatting one) rather than the last tag in the block.\n // For purely formatting tag blocks (e.g. </i></strong>) or insert operations,\n // use the last tag as before to match against the outermost opening tag.\n let tagIndexToCompare = indexLastTagInFirstTagBlock\n if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {\n const hasNonFormattingClosingTag = words\n .slice(0, indexLastTagInFirstTagBlock + 1)\n .some(w => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))\n if (hasNonFormattingClosingTag) {\n tagIndexToCompare = 0\n }\n }\n const openingAndClosingTagsMatch =\n !!openingTag && Utils.getTagName(openingTag) === Utils.getTagName(words[tagIndexToCompare])\n\n if (!!openingTag && openingAndClosingTagsMatch) {\n specialCaseTagInjection = '</ins>'\n specialCaseTagInjectionIsBefore = true\n }\n\n // if the tag has a corresponding opening tag, but they don't match,\n // we need to push the opening tag back onto the stack\n else if (openingTag) {\n this.specialTagDiffStack.push(openingTag)\n }\n\n if (tag === HtmlDiff.DelTag) {\n words.shift()\n // following tags may be formatting tags as well, follow through\n while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {\n words.shift()\n }\n }\n }\n\n if (words.length === 0 && specialCaseTagInjection.length === 0) {\n break\n }\n\n // For delete operations, only extract non-formatting tags. Formatting tags (special case\n // opening/closing tags) need to be handled in the next loop iteration so they go through\n // the proper specialTagDiffStack logic. Otherwise, opening formatting tags get output as\n // plain tags and their corresponding closing tags are later discarded, producing invalid HTML.\n const isTagForExtraction =\n tag === HtmlDiff.DelTag\n ? (x: string) =>\n Utils.isTag(x) &&\n !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) &&\n !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase())\n : Utils.isTag\n\n if (specialCaseTagInjectionIsBefore) {\n this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(''))\n } else {\n this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join('') + specialCaseTagInjection)\n }\n\n if (words.length === 0) continue\n\n // if there are still words left, they must start with a nonTag and need to be handled in the next iteration.\n this.insertTag(tag, cssClass, words)\n break\n }\n }\n\n private extractConsecutiveWords(words: string[], condition: (character: string) => boolean): string[] {\n let indexOfFirstTag: number | null = null\n for (let i = 0; i < words.length; i++) {\n const word = words[i]\n if (i === 0 && word === ' ') {\n words[i] = '&nbsp;'\n }\n if (!condition(word)) {\n indexOfFirstTag = i\n break\n }\n }\n\n if (indexOfFirstTag !== null) {\n const items = words.slice(0, indexOfFirstTag)\n if (indexOfFirstTag > 0) {\n words.splice(0, indexOfFirstTag)\n }\n return items\n }\n\n const items = words.slice(0)\n words.splice(0, words.length)\n return items\n }\n\n private operations(): Operation[] {\n let positionInOld = 0\n let positionInNew = 0\n const operations: Operation[] = []\n\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n const matches = this.matchingBlocks()\n matches.push(new Match(wordsForDiffOld.length, wordsForDiffNew.length, 0))\n\n //Remove orphans from matches.\n //If distance between left and right matches is 4 times longer than length of current match then it is considered as orphan\n const matchesWithoutOrphans = this.removeOrphans(matches)\n\n for (const match of matchesWithoutOrphans) {\n const matchStartsAtCurrentPositionInOld = positionInOld === match.startInOld\n const matchStartsAtCurrentPositionInNew = positionInNew === match.startInNew\n\n let action: Action\n\n if (!matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {\n action = Action.Replace\n } else if (matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {\n action = Action.Insert\n } else if (!matchStartsAtCurrentPositionInOld) {\n action = Action.Delete\n } // This occurs if the first few words are the same in both versions\n else {\n action = Action.None\n }\n\n if (action !== Action.None) {\n operations.push(new Operation(action, positionInOld, match.startInOld, positionInNew, match.startInNew))\n }\n\n if (match.size !== 0) {\n operations.push(new Operation(Action.Equal, match.startInOld, match.endInOld, match.startInNew, match.endInNew))\n }\n\n positionInOld = match.endInOld\n positionInNew = match.endInNew\n }\n\n return operations\n }\n\n private *removeOrphans(matches: Match[]) {\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n let prev: Match = new Match(0, 0, 0)\n let curr: Match | null = null\n\n for (const next of matches) {\n if (curr === null) {\n curr = next\n continue\n }\n\n if (\n (prev.endInOld === curr.startInOld && prev.endInNew === curr.startInNew) ||\n (curr.endInOld === next.startInOld && curr.endInNew === next.startInNew)\n ) {\n //if match has no diff on the left or on the right\n yield curr\n prev = curr\n curr = next\n continue\n }\n\n let oldDistanceInChars = 0\n for (let i = prev.endInOld; i < next.startInOld; i++) {\n oldDistanceInChars += wordsForDiffOld[i].length\n }\n let newDistanceInChars = 0\n for (let i = prev.endInNew; i < next.startInNew; i++) {\n newDistanceInChars += wordsForDiffNew[i].length\n }\n let currMatchLengthInChars = 0\n for (let i = curr.startInNew; i < curr.endInNew; i++) {\n currMatchLengthInChars += wordsForDiffNew[i].length\n }\n\n if (currMatchLengthInChars > Math.max(oldDistanceInChars, newDistanceInChars) * this.orphanMatchThreshold) {\n yield curr\n }\n\n prev = curr\n curr = next\n }\n\n if (curr !== null) {\n yield curr //assume that the last match is always vital\n }\n }\n\n private matchingBlocks(): Match[] {\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n const matchingBlocks: Match[] = []\n this.findMatchingBlocks(0, wordsForDiffOld.length, 0, wordsForDiffNew.length, matchingBlocks)\n return matchingBlocks\n }\n\n private findMatchingBlocks(\n startInOld: number,\n endInOld: number,\n startInNew: number,\n endInNew: number,\n matchingBlocks: Match[]\n ) {\n const match = this.findMatch(startInOld, endInOld, startInNew, endInNew)\n\n if (match !== null) {\n if (startInOld < match.startInOld && startInNew < match.startInNew) {\n this.findMatchingBlocks(startInOld, match.startInOld, startInNew, match.startInNew, matchingBlocks)\n }\n\n matchingBlocks.push(match)\n\n if (match.endInOld < endInOld && match.endInNew < endInNew) {\n this.findMatchingBlocks(match.endInOld, endInOld, match.endInNew, endInNew, matchingBlocks)\n }\n }\n }\n\n private findMatch(startInOld: number, endInOld: number, startInNew: number, endInNew: number): Match | null {\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n // For large texts it is more likely that there is a Match of size bigger than maximum granularity.\n // If not then go down and try to find it with smaller granularity.\n for (let i = this.matchGranularity; i > 0; i--) {\n const options = {\n blockSize: i,\n repeatingWordsAccuracy: this.repeatingWordsAccuracy,\n ignoreWhitespaceDifferences: this.ignoreWhitespaceDifferences,\n }\n const finder = new MatchFinder(\n wordsForDiffOld,\n wordsForDiffNew,\n startInOld,\n endInOld,\n startInNew,\n endInNew,\n options\n )\n const match = finder.findMatch()\n if (match !== null) return match\n }\n return null\n }\n}\n"],"mappings":";AAAA,IAAqB,QAArB,MAA2B;CACzB;CACA;CACA;CAEA,YAAY,YAAoB,YAAoB,MAAc;AAChE,OAAK,cAAc;AACnB,OAAK,cAAc;AACnB,OAAK,QAAQ;;CAGf,IAAI,aAAa;AACf,SAAO,KAAK;;CAGd,IAAI,aAAa;AACf,SAAO,KAAK;;CAGd,IAAI,OAAO;AACT,SAAO,KAAK;;CAGd,IAAI,WAAW;AACb,SAAO,KAAK,cAAc,KAAK;;CAGjC,IAAI,WAAW;AACb,SAAO,KAAK,cAAc,KAAK;;;;;AC5BnC,MAAM,kBAAkB;AACxB,MAAM,qBAAqB;AAC3B,MAAM,eAAe;AACrB,MAAM,kBAAkB;AACxB,MAAM,YAAY;AAClB,MAAM,WAAW;AAEjB,MAAM,sBAAyC,CAAC,OAAO;AAEvD,SAAgB,MAAM,MAAuB;AAC3C,KAAI,oBAAoB,MAAK,OAAM,MAAM,WAAW,GAAG,CAAC,CACtD,QAAO;AAGT,QAAO,aAAa,KAAK,IAAI,aAAa,KAAK;;AAGjD,SAAS,aAAa,MAAuB;AAC3C,QAAO,gBAAgB,KAAK,KAAK;;AAGnC,SAAS,aAAa,MAAuB;AAC3C,QAAO,mBAAmB,KAAK,KAAK;;AAGtC,SAAgB,mBAAmB,MAAsB;CACvD,MAAM,QAAQ,aAAa,KAAK,KAAK;AACrC,KAAI,MACF,QAAO,GAAG,MAAM,KAAK,KAAK,SAAS,KAAK,GAAG,OAAO;AAGpD,QAAO;;AAGT,SAAgB,SAAS,MAAc,SAAiB,UAA0B;AAChF,QAAO,IAAI,QAAQ,UAAU,SAAS,IAAI,KAAK,IAAI,QAAQ;;AAG7D,SAAgB,aAAa,KAAsB;AACjD,QAAO,QAAQ;;AAGjB,SAAgB,WAAW,KAAsB;AAC/C,QAAO,QAAQ;;AAGjB,SAAgB,gBAAgB,KAAsB;AACpD,QAAO,QAAQ;;AAGjB,SAAgB,cAAc,KAAsB;AAClD,QAAO,QAAQ;;AAGjB,SAAgB,aAAa,OAAwB;AACnD,QAAO,gBAAgB,KAAK,MAAM;;AAGpC,SAAgB,mBAAmB,MAAsB;AACvD,KAAI,MAAM,KAAK,CACb,QAAO,mBAAmB,KAAK;AAGjC,QAAO;;AAGT,SAAgB,OAAO,MAAuB;AAC5C,QAAO,UAAU,KAAK,KAAK;;AAG7B,SAAgB,WAAW,MAA6B;AACtD,KAAI,SAAS,KACX,QAAO;CAGT,MAAM,QAAQ,SAAS,KAAK,KAAK;AACjC,KAAI,MACF,QAAO,MAAM,QAAQ,KAAK,aAAa,IAAI,MAAM,GAAG,aAAa;AAGnE,QAAO;;AAGT,IAAA,gBAAe;CACb;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;;;;;;ACxFD,IAAqB,cAArB,MAAqB,YAAY;CAC/B;CACA;CACA;CACA;CACA;CACA;CACA,cAAoD,EAAE;CACtD;CAEA,YACE,UACA,UACA,YACA,UACA,YACA,UACA,SACA;AACA,OAAK,WAAW;AAChB,OAAK,WAAW;AAChB,OAAK,aAAa;AAClB,OAAK,WAAW;AAChB,OAAK,aAAa;AAClB,OAAK,WAAW;AAChB,OAAK,UAAU;;CAGjB,gBAAwB;AACtB,OAAK,cAAc,EAAE;EACrB,MAAM,QAAkB,EAAE;AAC1B,OAAK,IAAI,IAAI,KAAK,YAAY,IAAI,KAAK,UAAU,KAAK;GAEpD,MAAM,OAAO,KAAK,kBAAkB,KAAK,SAAS,GAAG;GACrD,MAAM,MAAM,YAAY,WAAW,OAAO,MAAM,KAAK,QAAQ,UAAU;AAEvE,OAAI,QAAQ,KACV;AAGF,OAAI,CAAC,KAAK,YAAY,KACpB,MAAK,YAAY,OAAO,EAAE;AAE5B,QAAK,YAAY,KAAK,KAAK,EAAE;;;CAIjC,OAAe,WAAW,OAAiB,MAAc,WAAkC;AACzF,QAAM,KAAK,KAAK;AAEhB,MAAI,MAAM,SAAS,UACjB,OAAM,OAAO;AAGf,MAAI,MAAM,WAAW,UACnB,QAAO;AAGT,SAAO,MAAM,KAAK,GAAG;;CAGvB,kBAA0B,MAAsB;EAC9C,MAAM,SAASA,cAAM,mBAAmB,KAAK;AAC7C,MAAI,KAAK,QAAQ,+BAA+BA,cAAM,aAAa,OAAO,CACxE,QAAO;AAGT,SAAO;;CAGT,YAA0B;AACxB,OAAK,eAAe;AACpB,OAAK,sBAAsB;EAE3B,IAAI,aAAa;AACjB,OAAK,MAAM,QAAQ,KAAK,aAAa;AACnC,gBAAa;AACb;;AAEF,MAAI,CAAC,WACH,QAAO;EAGT,IAAI,iBAAiB,KAAK;EAC1B,IAAI,iBAAiB,KAAK;EAC1B,IAAI,gBAAgB;EAEpB,IAAI,gCAAqC,IAAI,KAAK;EAClD,MAAM,QAAkB,EAAE;AAE1B,OAAK,IAAI,aAAa,KAAK,YAAY,aAAa,KAAK,UAAU,cAAc;GAC/E,MAAM,OAAO,KAAK,kBAAkB,KAAK,SAAS,YAAY;GAC9D,MAAM,QAAQ,YAAY,WAAW,OAAO,MAAM,KAAK,QAAQ,UAAU;AAEzE,OAAI,UAAU,KACZ;GAGF,MAAM,mCAAwC,IAAI,KAAK;AAEvD,OAAI,CAAC,KAAK,YAAY,QAAQ;AAC5B,oBAAgB;AAChB;;AAGF,QAAK,MAAM,cAAc,KAAK,YAAY,QAAQ;IAEhD,MAAM,kBAAkB,cAAc,IAAI,aAAa,EAAE,GAAG,cAAc,IAAI,aAAa,EAAE,GAAI,KAAK;AACtG,qBAAiB,IAAI,YAAY,eAAe;AAEhD,QAAI,iBAAiB,eAAe;AAClC,sBAAiB,aAAa,iBAAiB,KAAK,QAAQ,YAAY;AACxE,sBAAiB,aAAa,iBAAiB,KAAK,QAAQ,YAAY;AACxE,qBAAgB;;;AAIpB,mBAAgB;;AAGlB,SAAO,kBAAkB,IACrB,IAAI,MAAM,gBAAgB,gBAAgB,gBAAgB,KAAK,QAAQ,YAAY,EAAE,GACrF;;;;;;;;CASN,uBAA+B;EAC7B,MAAM,YAAY,KAAK,SAAS,SAAS,KAAK,QAAQ;EACtD,MAAM,iBAAiB,OAAO,QAAQ,KAAK,YAAY,CACpD,QAAQ,GAAG,aAAa,QAAQ,SAAS,UAAU,CACnD,KAAK,CAAC,UAAU,KAAK;AAExB,OAAK,MAAM,KAAK,eACd,QAAO,KAAK,YAAY;;;;;AC/I9B,IAAqB,YAArB,MAA+B;CAC7B;CACA;CACA;CACA;CACA;CAEA,YAAY,QAAgB,YAAoB,UAAkB,YAAoB,UAAkB;AACtG,OAAK,SAAS;AACd,OAAK,aAAa;AAClB,OAAK,WAAW;AAChB,OAAK,aAAa;AAClB,OAAK,WAAW;;;;;ACXpB,IAAqB,eAArB,MAAqB,aAAa;CAChC;CACA;CACA;CACA;CACA,aAAqB;CACrB;CACA;CACA;CACA,OAAe,cAAc;CAE7B,IAAY,sBAAsB;AAChC,SAAO,KAAK,YAAY,SAAS;;CAGnC,YAAY,MAAc,kBAA4B;AACpD,OAAK,OAAO;AACZ,OAAK,iBAAiB,IAAI,YAAY,MAAM,iBAAiB,CAAC,YAAY;AAC1E,OAAK,uBAAuB,KAAK,eAAe;AAChD,OAAK,OAAA;AACL,OAAK,gBAAgB,aAAa;AAClC,OAAK,cAAc,EAAE;AACrB,OAAK,QAAQ,EAAE;;CAGjB,UAAoB;AAClB,OAAK,IAAI,QAAQ,GAAG,QAAQ,KAAK,KAAK,QAAQ,SAAS;GACrD,MAAM,YAAY,KAAK,KAAK,OAAO,MAAM;AACzC,QAAK,iBAAiB,OAAO,UAAU;;AAGzC,OAAK,0BAA0B;AAC/B,SAAO,KAAK;;CAGd,iBAAyB,OAAe,WAAmB;AACzD,MAAI,KAAK,WAAW,OAAO,UAAU,CACnC;AAGF,UAAQ,KAAK,MAAb;GACE,KAAA;AACE,SAAK,qBAAqB,UAAU;AACpC;GACF,KAAA;AACE,SAAK,2BAA2B,UAAU;AAC1C;GACF,KAAA;AACE,SAAK,8BAA8B,UAAU;AAC7C;GACF,KAAA;AACE,SAAK,0BAA0B,UAAU;AACzC;;;CAIN,0BAAkC,WAAmB;AACnD,MAAIC,cAAM,aAAa,UAAU,EAAE;AACjC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACI,UAAU,MAAM,CAAC,WAAW,GAAG;AACxC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,cAAc,UAAU,EAAE;GACzC,IAAI,mBAAmB;AACvB,OAAI,KAAK,qBAAqB;AAC5B,SAAK,YAAY,KAAK,UAAU;AAChC,SAAK,MAAM,KAAK,KAAK,YAAY,KAAK,GAAG,CAAC;AAG1C,QACE,KAAK,MAAM,SAAS,KACpBA,cAAM,aAAa,KAAK,MAAM,KAAK,MAAM,SAAS,GAAG,IACrDA,cAAM,aAAa,KAAK,MAAM,KAAK,MAAM,SAAS,GAAG,EACrD;KACA,MAAM,KAAK,KAAK,MAAM,KAAK,MAAM,SAAS;KAC1C,MAAM,KAAK,KAAK,MAAM,KAAK,MAAM,SAAS;AAC1C,UAAK,MAAM,OAAO,KAAK,MAAM,SAAS,GAAG,EAAE;AAC3C,UAAK,cAAc,GAAG,KAAK,KAAK,MAAM,GAAG;AACzC,UAAK,OAAA;AACL,wBAAmB;;;AAIvB,OAAI,kBAAkB;AACpB,SAAK,cAAc,EAAE;AACrB,SAAK,OAAA;;aAEEA,cAAM,OAAO,UAAU,CAChC,MAAK,YAAY,KAAK,UAAU;OAC3B;AACL,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;;;CAIT,8BAAsC,WAAmB;AACvD,MAAIA,cAAM,aAAa,UAAU,EAAE;AACjC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,gBAAgB,UAAU,EAAE;AAC3C,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,aAAa,UAAU,CACtC,MAAK,YAAY,KAAK,UAAU;OAC3B;AACL,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;;;CAIT,2BAAmC,WAAmB;AACpD,MAAIA,cAAM,WAAW,UAAU,EAAE;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,0BAA0B;AAC/B,QAAK,OAAOA,cAAM,aAAa,UAAU,GAAA,IAAA;QAEzC,MAAK,YAAY,KAAK,UAAU;;CAIpC,qBAA6B,WAAmB;AAC9C,MAAIA,cAAM,aAAa,UAAU,EAAE;AACjC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,IAAI;AAC1B,QAAK,OAAA;aACIA,cAAM,gBAAgB,UAAU,EAAE;AAC3C,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,aAAa,UAAU,EAAE;AACxC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aAELA,cAAM,OAAO,UAAU,KACtB,KAAK,YAAY,WAAW,KAAKA,cAAM,OAAO,KAAK,YAAY,KAAK,YAAY,SAAS,GAAG,EAE7F,MAAK,YAAY,KAAK,UAAU;OAC3B;AACL,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;;;CAIpC,2BAAmC;AACjC,MAAI,KAAK,qBAAqB;AAC5B,QAAK,MAAM,KAAK,KAAK,YAAY,KAAK,GAAG,CAAC;AAC1C,QAAK,cAAc,EAAE;;;CAIzB,WAAmB,OAAe,WAA4B;AAC5D,MAAI,CAAC,KAAK,qBACR,QAAO;AAGT,MADkC,UAAU,KAAK,eAClB;AAC7B,QAAK,gBAAgB,aAAa;AAClC,QAAK,aAAa;AAClB,QAAK,0BAA0B;;EAGjC,MAAM,QAAQ,KAAK,eAAe,UAAU,MAAM;AAClD,MAAI,OAAO;AACT,QAAK,aAAa;AAClB,QAAK,gBAAgB;;AAEvB,MAAI,KAAK,YAAY;AACnB,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;;AAEP,SAAO,KAAK;;CAGd,OAAO,yBAAyB,MAAc,kBAAsC;AAClF,SAAO,IAAI,aAAa,MAAM,iBAAiB,CAAC,SAAS;;;AAI7D,IAAM,oBAAN,MAAwB;CACtB,yBAAsC,IAAI,KAAK;CAE/C,SAAS,MAAc,IAAY;AACjC,MAAI,KAAK,OAAO,IAAI,KAAK,CACvB,OAAM,IAAI,cAAc,yEAAyE;AAGnG,OAAK,OAAO,IAAI,MAAM,GAAG;;CAG3B,UAAU,UAAiC;AACzC,SAAO,KAAK,OAAO,IAAI,SAAS,IAAI;;CAGtC,IAAI,YAAY;AACd,SAAO,KAAK,OAAO,OAAO;;;AAI9B,IAAM,gBAAN,cAA4B,MAAM;AAElC,IAAM,cAAN,MAAkB;CAChB;CACA;CAEA,YAAY,MAAc,kBAA4B;AACpD,OAAK,OAAO;AACZ,OAAK,mBAAmB;;CAG1B,aAAgC;EAC9B,MAAM,SAAS,IAAI,mBAAmB;AACtC,OAAK,MAAM,cAAc,KAAK,iBAC5B,MAAK,oBAAoB,YAAY,OAAO;AAE9C,SAAO;;CAGT,oBAA4B,KAAa,QAA2B;EAClE,IAAI;AAEJ,UAAQ,QAAQ,IAAI,KAAK,KAAK,KAAK,MAAM,KACvC,MAAK,YAAY,KAAK,OAAO,OAAO;;CAIxC,YAAoB,KAAa,OAAwB,QAA2B;AAClF,MAAI;GACF,MAAM,OAAO,MAAM;GACnB,MAAM,KAAK,MAAM,QAAQ,MAAM,GAAG;AAClC,UAAO,SAAS,MAAM,GAAG;UACnB;AACN,SAAM,IAAI,cACR,8FAA8F,MAC/F;;;;;;AC7OP,IAAqB,WAArB,MAAqB,SAAS;;;;;CAK5B,OAAe,0BAA0B;CAEzC,OAAe,SAAS;CACxB,OAAe,SAAS;CAGxB,OAAe,yBAAyB;EACtC;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACD;CAED,OAAe,4BAA4B,IAAI,IAAI;EACjD;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACD,CAAC;CAEF,OAAe,6BACb;CAEF,OAAe,iBAAiB,IAAI,IAAI;EACtC;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACD,CAAC;CAEF,UAA4B,EAAE;CAC9B;CACA;CAEA,sBAAwC,EAAE;CAC1C,WAA6B,EAAE;CAC/B,WAA6B,EAAE;;;;;CAK/B,kBAA2C;CAC3C,kBAA2C;;CAE3C,uBAAgD;CAChD,uBAAgD;;CAEhD,6BAAqC;CACrC,mBAA2B;CAC3B,mBAAqC,EAAE;;;;;;;;;;CAWvC,yBAAyB;;;;CAKzB,8BAA8B;;;;;;;;;;;;;;;;;;;CAoB9B,uBAAuB;;;;;;CAOvB,YAAY,SAAiB,SAAiB;AAC5C,OAAK,UAAU;AACf,OAAK,UAAU;;CAGjB,OAAO,QAAQ,SAAiB,SAAiB;AAC/C,SAAO,IAAI,SAAS,SAAS,QAAQ,CAAC,OAAO;;;;;;CAO/C,QAAgB;AAEd,MAAI,KAAK,YAAY,KAAK,QACxB,QAAO,KAAK;AAGd,OAAK,oBAAoB;AACzB,OAAK,yBAAyB;EAE9B,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;AAErD,OAAK,mBAAmB,KAAK,IAC3B,SAAS,yBACT,KAAK,IAAI,gBAAgB,QAAQ,gBAAgB,OAAO,CACzD;EAED,MAAM,aAAa,KAAK,YAAY;AACpC,OAAK,MAAM,MAAM,WACf,MAAK,iBAAiB,GAAG;AAG3B,SAAO,KAAK,QAAQ,KAAK,GAAG;;;;;;CAO9B,mBAAmB,YAAoB;AACrC,OAAK,iBAAiB,KAAK,WAAW;;CAGxC,qBAA6B;AAC3B,OAAK,WAAW,aAAa,yBAAyB,KAAK,SAAS,KAAK,iBAAiB;AAG1F,OAAK,UAAU;AAEf,OAAK,WAAW,aAAa,yBAAyB,KAAK,SAAS,KAAK,iBAAiB;AAG1F,OAAK,UAAU;;;;;;;CAQjB,0BAAkC;AAGhC,MAAI,CAAC,SAAS,yBAAyB,KAAK,UAAU,KAAK,SAAS,CAClE;EAGF,MAAM,gBAAgB,SAAS,wBAAwB,KAAK,SAAS;EACrE,MAAM,gBAAgB,SAAS,wBAAwB,KAAK,SAAS;AAIrE,MAAI,cAAc,aAAa,WAAW,KAAK,cAAc,aAAa,WAAW,EACnF;AAGF,OAAK,kBAAkB,cAAc;AACrC,OAAK,uBAAuB,cAAc;AAC1C,OAAK,kBAAkB,cAAc;AACrC,OAAK,uBAAuB,cAAc;;;;;;;CAQ5C,OAAe,cAAc,IAAI,IAAI;EAAC;EAAO;EAAK;EAAW;EAAW;EAAQ;EAAU;EAAU;EAAS;EAAM,CAAC;CAEpH,OAAe,gBAAgB,MAAuB;AACpD,MAAI,CAACC,cAAM,MAAM,KAAK,CAAE,QAAO;EAC/B,MAAM,UAAUA,cAAM,WAAW,KAAK;AACtC,SAAO,SAAS,YAAY,IAAI,QAAQ;;;;;CAM1C,OAAe,uBAAuB,OAAiB,OAAwB;AAC7E,MAAI,CAACA,cAAM,aAAa,MAAM,OAAO,CAAE,QAAO;EAG9C,MAAM,mBAAmB,UAAU,KAAK,SAAS,gBAAgB,MAAM,QAAQ,GAAG;EAClF,MAAM,mBAAmB,UAAU,MAAM,SAAS,KAAK,SAAS,gBAAgB,MAAM,QAAQ,GAAG;AACjG,SAAO,oBAAoB;;CAG7B,OAAe,wBAAwB,OAGrC;EACA,MAAM,eAAyB,EAAE;EACjC,MAAM,oBAA8B,EAAE;AAEtC,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,OAAI,SAAS,gBAAgB,MAAM,GAAG,CAAE;AACxC,OAAI,SAAS,uBAAuB,OAAO,EAAE,CAAE;AAC/C,gBAAa,KAAK,MAAM,GAAG;AAC3B,qBAAkB,KAAK,EAAE;;AAG3B,SAAO;GAAE;GAAc;GAAmB;;CAG5C,OAAe,yBAAyB,UAAoB,UAA6B;EACvF,MAAM,gBAA0B,EAAE;EAClC,MAAM,gBAA0B,EAAE;AAKlC,OAAK,MAAM,KAAK,SACd,KAAI,SAAS,gBAAgB,EAAE,CAC7B,eAAc,KAAKA,cAAM,mBAAmB,EAAE,CAAC;AAGnD,OAAK,MAAM,KAAK,SACd,KAAI,SAAS,gBAAgB,EAAE,CAC7B,eAAc,KAAKA,cAAM,mBAAmB,EAAE,CAAC;AAInD,MAAI,cAAc,WAAW,cAAc,OAAQ,QAAO;AAC1D,OAAK,IAAI,IAAI,GAAG,IAAI,cAAc,QAAQ,IACxC,KAAI,cAAc,OAAO,cAAc,GAAI,QAAO;AAEpD,SAAO;;CAGT,iBAAyB,WAAsB;AAC7C,UAAQ,UAAU,QAAlB;GACE,KAAA;AACE,SAAK,sBAAsB,UAAU;AACrC;GACF,KAAA;AACE,SAAK,uBAAuB,WAAW,UAAU;AACjD;GACF,KAAA;AACE,SAAK,uBAAuB,WAAW,UAAU;AACjD;GACF,KAAA,EACE;GACF,KAAA;AACE,SAAK,wBAAwB,UAAU;AACvC;;;CAIN,wBAAgC,WAAsB;AACpD,OAAK,uBAAuB,WAAW,UAAU;AACjD,OAAK,uBAAuB,WAAW,UAAU;;CAGnD,uBAA+B,WAAsB,UAAkB;EACrE,MAAM,QAAQ,KAAK,kBACf,KAAK,oBAAoB,UAAU,YAAY,UAAU,SAAS,GAClE,KAAK,SAAS,MAAM,UAAU,YAAY,UAAU,SAAS;AACjE,OAAK,UAAU,SAAS,QAAQ,UAAU,MAAM;;CAGlD,uBAA+B,WAAsB,UAAkB;EACrE,MAAM,QAAQ,KAAK,kBACf,KAAK,oBAAoB,UAAU,YAAY,UAAU,SAAS,GAClE,KAAK,SAAS,MAAM,UAAU,YAAY,UAAU,SAAS;AACjE,OAAK,UAAU,SAAS,QAAQ,UAAU,MAAM;AAIhD,MAAI,KAAK,wBAAwB,UAAU,WAAW,GAAG;GACvD,MAAM,qBAAqB,KAAK,qBAAqB,UAAU,WAAW;AAC1E,QAAK,6BAA6B,KAAK,IAAI,KAAK,4BAA4B,qBAAqB,EAAE;;;CAIvG,sBAA8B,WAAsB;AAClD,MAAI,KAAK,iBAAiB;GAExB,MAAM,SAAS,KAAK,iCAAiC,UAAU,YAAY,UAAU,SAAS;AAC9F,QAAK,QAAQ,KAAK,OAAO,KAAK,GAAG,CAAC;SAC7B;GACL,MAAM,SAAS,KAAK,SAAS,MAAM,UAAU,YAAY,UAAU,SAAS;AAC5E,QAAK,QAAQ,KAAK,OAAO,KAAK,GAAG,CAAC;;;;;;;CAQtC,oBAA4B,cAAsB,YAA8B;AAC9E,MAAI,CAAC,KAAK,qBAAsB,QAAO,KAAK,SAAS,MAAM,cAAc,WAAW;EACpF,MAAM,SAAmB,EAAE;AAC3B,OAAK,IAAI,IAAI,cAAc,IAAI,YAAY,IACzC,QAAO,KAAK,KAAK,SAAS,KAAK,qBAAqB,IAAI;AAE1D,SAAO;;;;;;CAOT,oBAA4B,cAAsB,YAA8B;AAC9E,MAAI,CAAC,KAAK,qBAAsB,QAAO,KAAK,SAAS,MAAM,cAAc,WAAW;EACpF,MAAM,SAAmB,EAAE;AAC3B,OAAK,IAAI,IAAI,cAAc,IAAI,YAAY,IACzC,QAAO,KAAK,KAAK,SAAS,KAAK,qBAAqB,IAAI;AAE1D,SAAO;;;;;;CAOT,iCAAyC,cAAsB,YAA8B;AAC3F,MAAI,CAAC,KAAK,qBAAsB,QAAO,KAAK,SAAS,MAAM,cAAc,WAAW;AACpF,MAAI,gBAAgB,WAAY,QAAO,EAAE;EAKzC,MAAM,sBAAsB,KAAK,qBAAqB;EACtD,MAAM,YAAY,KAAK,IAAI,KAAK,4BAA4B,oBAAoB;EAIhF,MAAM,UACJ,aAAa,KAAK,qBAAqB,SAAS,KAAK,qBAAqB,cAAc,KAAK,SAAS;AAExG,OAAK,6BAA6B;AAClC,SAAO,KAAK,SAAS,MAAM,WAAW,QAAQ;;;;;;;;;;;;;;;;;;;;;;;CAwBhD,UAAkB,KAAa,UAAkB,OAAiB;AAChE,SAAO,MAAM;AACX,OAAI,MAAM,WAAW,EACnB;GAGF,MAAM,wBAAwB,KAAK,wBAAwB,QAAO,MAAK,CAACA,cAAM,MAAM,EAAE,CAAC;AACvF,OAAI,sBAAsB,SAAS,GAAG;IACpC,MAAM,OAAOA,cAAM,SAAS,sBAAsB,KAAK,GAAG,EAAE,KAAK,SAAS;AAC1E,SAAK,QAAQ,KAAK,KAAK;;AAIzB,OAD4B,MAAM,WAAW,EAE3C;GAQF,MAAM,qBAAqB,MAAM,WAAU,MAAK,CAACA,cAAM,MAAM,EAAE,CAAC;GAIhE,MAAM,8BAA8B,uBAAuB,KAAK,MAAM,SAAS,IAAI,qBAAqB;GAExG,IAAI,0BAA0B;GAC9B,IAAI,kCAAkC;AAGtC,OAAI,SAAS,2BAA2B,KAAK,MAAM,GAAG,EAAE;IACtD,MAAM,2BAAW,IAAI,KAAa;AAClC,SAAK,MAAM,QAAQ,MACjB,KAAIA,cAAM,MAAM,KAAK,CACnB,UAAS,IAAIA,cAAM,WAAW,KAAK,CAAC;IAGxC,MAAM,iBAAiB,MAAM,KAAK,SAAS,CAAC,KAAK,IAAI;AAErD,SAAK,oBAAoB,KAAK,MAAM,GAAG;AACvC,8BAA0B,mBAAmB,eAAe;AAC5D,QAAI,QAAQ,SAAS,QAAQ;AAC3B,WAAM,OAAO;AAGb,YAAO,MAAM,SAAS,KAAK,SAAS,2BAA2B,KAAK,MAAM,GAAG,CAC3E,OAAM,OAAO;;cAKV,SAAS,0BAA0B,IAAI,MAAM,GAAG,aAAa,CAAC,EAAE;IACvE,MAAM,aAAa,KAAK,oBAAoB,WAAW,IAAI,OAAO,KAAK,oBAAoB,KAAK;IAMhG,IAAI,oBAAoB;AACxB,QAAI,QAAQ,SAAS,UAAU,uBAAuB;SACjB,MAChC,MAAM,GAAG,8BAA8B,EAAE,CACzC,MAAK,MAAK,CAAC,SAAS,0BAA0B,IAAI,EAAE,aAAa,CAAC,CAAC,CAEpE,qBAAoB;;IAGxB,MAAM,6BACJ,CAAC,CAAC,cAAcA,cAAM,WAAW,WAAW,KAAKA,cAAM,WAAW,MAAM,mBAAmB;AAE7F,QAAI,CAAC,CAAC,cAAc,4BAA4B;AAC9C,+BAA0B;AAC1B,uCAAkC;eAK3B,WACP,MAAK,oBAAoB,KAAK,WAAW;AAG3C,QAAI,QAAQ,SAAS,QAAQ;AAC3B,WAAM,OAAO;AAEb,YAAO,MAAM,SAAS,KAAK,SAAS,0BAA0B,IAAI,MAAM,GAAG,aAAa,CAAC,CACvF,OAAM,OAAO;;;AAKnB,OAAI,MAAM,WAAW,KAAK,wBAAwB,WAAW,EAC3D;GAOF,MAAM,qBACJ,QAAQ,SAAS,UACZ,MACCA,cAAM,MAAM,EAAE,IACd,CAAC,SAAS,2BAA2B,KAAK,EAAE,IAC5C,CAAC,SAAS,0BAA0B,IAAI,EAAE,aAAa,CAAC,GAC1DA,cAAM;AAEZ,OAAI,gCACF,MAAK,QAAQ,KAAK,0BAA0B,KAAK,wBAAwB,OAAO,mBAAmB,CAAC,KAAK,GAAG,CAAC;OAE7G,MAAK,QAAQ,KAAK,KAAK,wBAAwB,OAAO,mBAAmB,CAAC,KAAK,GAAG,GAAG,wBAAwB;AAG/G,OAAI,MAAM,WAAW,EAAG;AAGxB,QAAK,UAAU,KAAK,UAAU,MAAM;AACpC;;;CAIJ,wBAAgC,OAAiB,WAAqD;EACpG,IAAI,kBAAiC;AACrC,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACrC,MAAM,OAAO,MAAM;AACnB,OAAI,MAAM,KAAK,SAAS,IACtB,OAAM,KAAK;AAEb,OAAI,CAAC,UAAU,KAAK,EAAE;AACpB,sBAAkB;AAClB;;;AAIJ,MAAI,oBAAoB,MAAM;GAC5B,MAAM,QAAQ,MAAM,MAAM,GAAG,gBAAgB;AAC7C,OAAI,kBAAkB,EACpB,OAAM,OAAO,GAAG,gBAAgB;AAElC,UAAO;;EAGT,MAAM,QAAQ,MAAM,MAAM,EAAE;AAC5B,QAAM,OAAO,GAAG,MAAM,OAAO;AAC7B,SAAO;;CAGT,aAAkC;EAChC,IAAI,gBAAgB;EACpB,IAAI,gBAAgB;EACpB,MAAM,aAA0B,EAAE;EAElC,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EAErD,MAAM,UAAU,KAAK,gBAAgB;AACrC,UAAQ,KAAK,IAAI,MAAM,gBAAgB,QAAQ,gBAAgB,QAAQ,EAAE,CAAC;EAI1E,MAAM,wBAAwB,KAAK,cAAc,QAAQ;AAEzD,OAAK,MAAM,SAAS,uBAAuB;GACzC,MAAM,oCAAoC,kBAAkB,MAAM;GAClE,MAAM,oCAAoC,kBAAkB,MAAM;GAElE,IAAI;AAEJ,OAAI,CAAC,qCAAqC,CAAC,kCACzC,UAAA;YACS,qCAAqC,CAAC,kCAC/C,UAAA;YACS,CAAC,kCACV,UAAA;OAGA,UAAA;AAGF,OAAI,WAAA,EACF,YAAW,KAAK,IAAI,UAAU,QAAQ,eAAe,MAAM,YAAY,eAAe,MAAM,WAAW,CAAC;AAG1G,OAAI,MAAM,SAAS,EACjB,YAAW,KAAK,IAAI,UAAA,GAAwB,MAAM,YAAY,MAAM,UAAU,MAAM,YAAY,MAAM,SAAS,CAAC;AAGlH,mBAAgB,MAAM;AACtB,mBAAgB,MAAM;;AAGxB,SAAO;;CAGT,CAAS,cAAc,SAAkB;EACvC,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EAErD,IAAI,OAAc,IAAI,MAAM,GAAG,GAAG,EAAE;EACpC,IAAI,OAAqB;AAEzB,OAAK,MAAM,QAAQ,SAAS;AAC1B,OAAI,SAAS,MAAM;AACjB,WAAO;AACP;;AAGF,OACG,KAAK,aAAa,KAAK,cAAc,KAAK,aAAa,KAAK,cAC5D,KAAK,aAAa,KAAK,cAAc,KAAK,aAAa,KAAK,YAC7D;AAEA,UAAM;AACN,WAAO;AACP,WAAO;AACP;;GAGF,IAAI,qBAAqB;AACzB,QAAK,IAAI,IAAI,KAAK,UAAU,IAAI,KAAK,YAAY,IAC/C,uBAAsB,gBAAgB,GAAG;GAE3C,IAAI,qBAAqB;AACzB,QAAK,IAAI,IAAI,KAAK,UAAU,IAAI,KAAK,YAAY,IAC/C,uBAAsB,gBAAgB,GAAG;GAE3C,IAAI,yBAAyB;AAC7B,QAAK,IAAI,IAAI,KAAK,YAAY,IAAI,KAAK,UAAU,IAC/C,2BAA0B,gBAAgB,GAAG;AAG/C,OAAI,yBAAyB,KAAK,IAAI,oBAAoB,mBAAmB,GAAG,KAAK,qBACnF,OAAM;AAGR,UAAO;AACP,UAAO;;AAGT,MAAI,SAAS,KACX,OAAM;;CAIV,iBAAkC;EAChC,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,iBAA0B,EAAE;AAClC,OAAK,mBAAmB,GAAG,gBAAgB,QAAQ,GAAG,gBAAgB,QAAQ,eAAe;AAC7F,SAAO;;CAGT,mBACE,YACA,UACA,YACA,UACA,gBACA;EACA,MAAM,QAAQ,KAAK,UAAU,YAAY,UAAU,YAAY,SAAS;AAExE,MAAI,UAAU,MAAM;AAClB,OAAI,aAAa,MAAM,cAAc,aAAa,MAAM,WACtD,MAAK,mBAAmB,YAAY,MAAM,YAAY,YAAY,MAAM,YAAY,eAAe;AAGrG,kBAAe,KAAK,MAAM;AAE1B,OAAI,MAAM,WAAW,YAAY,MAAM,WAAW,SAChD,MAAK,mBAAmB,MAAM,UAAU,UAAU,MAAM,UAAU,UAAU,eAAe;;;CAKjG,UAAkB,YAAoB,UAAkB,YAAoB,UAAgC;EAC1G,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;AAIrD,OAAK,IAAI,IAAI,KAAK,kBAAkB,IAAI,GAAG,KAAK;GAe9C,MAAM,QATS,IAAI,YACjB,iBACA,iBACA,YACA,UACA,YACA,UAXc;IACd,WAAW;IACX,wBAAwB,KAAK;IAC7B,6BAA6B,KAAK;IACnC,CASA,CACoB,WAAW;AAChC,OAAI,UAAU,KAAM,QAAO;;AAE7B,SAAO"}
1
+ {"version":3,"file":"HtmlDiff.cjs","names":["Utils","Utils","Utils"],"sources":["../src/Match.ts","../src/Utils.ts","../src/MatchFinder.ts","../src/Operation.ts","../src/WordSplitter.ts","../src/HtmlDiff.ts"],"sourcesContent":["export default class Match {\n private _startInOld: number\n private _startInNew: number\n private _size: number\n\n constructor(startInOld: number, startInNew: number, size: number) {\n this._startInOld = startInOld\n this._startInNew = startInNew\n this._size = size\n }\n\n get startInOld() {\n return this._startInOld\n }\n\n get startInNew() {\n return this._startInNew\n }\n\n get size() {\n return this._size\n }\n\n get endInOld() {\n return this._startInOld + this._size\n }\n\n get endInNew() {\n return this._startInNew + this._size\n }\n}\n","const openingTagRegex = /^\\s*<[^>]+>\\s*$/\nconst closingTagTexRegex = /^\\s*<\\/[^>]+>\\s*$/\nconst tagWordRegex = /<[^\\s>]+/\nconst whitespaceRegex = /^(\\s|&nbsp;)+$/\nconst wordRegex = /[\\w#@]+/\nconst tagRegex = /<\\/?(?<name>[^\\s/>]+)[^>]*>/\n\nconst SpecialCaseWordTags: readonly string[] = ['<img']\n\nexport function isTag(item: string): boolean {\n if (SpecialCaseWordTags.some(re => item?.startsWith(re))) {\n return false\n }\n\n return isOpeningTag(item) || isClosingTag(item)\n}\n\nfunction isOpeningTag(item: string): boolean {\n return openingTagRegex.test(item)\n}\n\nfunction isClosingTag(item: string): boolean {\n return closingTagTexRegex.test(item)\n}\n\nexport function stripTagAttributes(word: string): string {\n const match = tagWordRegex.exec(word)\n if (match) {\n return `${match[0]}${word.endsWith('/>') ? '/>' : '>'}`\n }\n\n return word\n}\n\nexport function wrapText(text: string, tagName: string, cssClass: string): string {\n return `<${tagName} class='${cssClass}'>${text}</${tagName}>`\n}\n\nexport function isStartOfTag(val: string): boolean {\n return val === '<'\n}\n\nexport function isEndOfTag(val: string): boolean {\n return val === '>'\n}\n\nexport function isStartOfEntity(val: string): boolean {\n return val === '&'\n}\n\nexport function isEndOfEntity(val: string): boolean {\n return val === ';'\n}\n\nexport function isWhiteSpace(value: string): boolean {\n return whitespaceRegex.test(value)\n}\n\nexport function stripAnyAttributes(word: string): string {\n if (isTag(word)) {\n return stripTagAttributes(word)\n }\n\n return word\n}\n\nexport function isWord(text: string): boolean {\n return wordRegex.test(text)\n}\n\nexport function getTagName(word: string | null): string {\n if (word === null) {\n return ''\n }\n\n const match = tagRegex.exec(word)\n if (match) {\n return match.groups?.name.toLowerCase() ?? match[1].toLowerCase()\n }\n\n return ''\n}\n\nexport default {\n isTag,\n stripTagAttributes,\n wrapText,\n isStartOfTag,\n isEndOfTag,\n isStartOfEntity,\n isEndOfEntity,\n isWhiteSpace,\n stripAnyAttributes,\n isWord,\n getTagName,\n}\n","import Match from './Match'\nimport type MatchOptions from './MatchOptions'\nimport Utils from './Utils'\n\n/**\n * Finds the longest match in given texts. It uses indexing with fixed granularity that is used to compare blocks of text.\n */\nexport default class MatchFinder {\n private oldWords: string[]\n private newWords: string[]\n private startInOld: number\n private endInOld: number\n private startInNew: number\n private endInNew: number\n private wordIndices: { [word: string]: number[] } = {}\n private options: MatchOptions\n\n constructor(\n oldWords: string[],\n newWords: string[],\n startInOld: number,\n endInOld: number,\n startInNew: number,\n endInNew: number,\n options: MatchOptions\n ) {\n this.oldWords = oldWords\n this.newWords = newWords\n this.startInOld = startInOld\n this.endInOld = endInOld\n this.startInNew = startInNew\n this.endInNew = endInNew\n this.options = options\n }\n\n private indexNewWords() {\n this.wordIndices = {}\n const block: string[] = []\n for (let i = this.startInNew; i < this.endInNew; i++) {\n // if word is a tag, we should ignore attributes as attribute changes are not supported (yet)\n const word = this.normalizeForIndex(this.newWords[i])\n const key = MatchFinder.putNewWord(block, word, this.options.blockSize)\n\n if (key === null) {\n continue\n }\n\n if (!this.wordIndices[key]) {\n this.wordIndices[key] = []\n }\n this.wordIndices[key].push(i)\n }\n }\n\n private static putNewWord(block: string[], word: string, blockSize: number): string | null {\n block.push(word)\n\n if (block.length > blockSize) {\n block.shift()\n }\n\n if (block.length !== blockSize) {\n return null\n }\n\n return block.join('')\n }\n\n private normalizeForIndex(word: string): string {\n const output = Utils.stripAnyAttributes(word)\n if (this.options.ignoreWhitespaceDifferences && Utils.isWhiteSpace(output)) {\n return ' '\n }\n\n return output\n }\n\n findMatch(): Match | null {\n this.indexNewWords()\n this.removeRepeatingWords()\n\n let hasIndices = false\n for (const _key in this.wordIndices) {\n hasIndices = true\n break\n }\n if (!hasIndices) {\n return null\n }\n\n let bestMatchInOld = this.startInOld\n let bestMatchInNew = this.startInNew\n let bestMatchSize = 0\n\n let matchLengthAt: Map<number, number> = new Map()\n const block: string[] = []\n\n for (let indexInOld = this.startInOld; indexInOld < this.endInOld; indexInOld++) {\n const word = this.normalizeForIndex(this.oldWords[indexInOld])\n const index = MatchFinder.putNewWord(block, word, this.options.blockSize)\n\n if (index === null) {\n continue\n }\n\n const newMatchLengthAt: Map<number, number> = new Map()\n\n if (!this.wordIndices[index]) {\n matchLengthAt = newMatchLengthAt\n continue\n }\n\n for (const indexInNew of this.wordIndices[index]) {\n // biome-ignore lint/style/noNonNullAssertion: This is safe as guarded by has()\n const newMatchLength = (matchLengthAt.has(indexInNew - 1) ? matchLengthAt.get(indexInNew - 1)! : 0) + 1\n newMatchLengthAt.set(indexInNew, newMatchLength)\n\n if (newMatchLength > bestMatchSize) {\n bestMatchInOld = indexInOld - newMatchLength - this.options.blockSize + 2\n bestMatchInNew = indexInNew - newMatchLength - this.options.blockSize + 2\n bestMatchSize = newMatchLength\n }\n }\n\n matchLengthAt = newMatchLengthAt\n }\n\n return bestMatchSize !== 0\n ? new Match(bestMatchInOld, bestMatchInNew, bestMatchSize + this.options.blockSize - 1)\n : null\n }\n\n /**\n * This method removes words that occur too many times. This way it reduces total count of comparison operations\n * and as result the diff algorithm takes less time. But the side effect is that it may detect false differences of\n * the repeating words.\n * @private\n */\n private removeRepeatingWords() {\n const threshold = this.newWords.length * this.options.repeatingWordsAccuracy\n const repeatingWords = Object.entries(this.wordIndices)\n .filter(([, indices]) => indices.length > threshold)\n .map(([word]) => word)\n\n for (const w of repeatingWords) {\n delete this.wordIndices[w]\n }\n }\n}\n","import type Action from './Action'\n\nexport default class Operation {\n action: Action\n startInOld: number\n endInOld: number\n startInNew: number\n endInNew: number\n\n constructor(action: Action, startInOld: number, endInOld: number, startInNew: number, endInNew: number) {\n this.action = action\n this.startInOld = startInOld\n this.endInOld = endInOld\n this.startInNew = startInNew\n this.endInNew = endInNew\n }\n}\n","import Mode from './Mode'\nimport Utils from './Utils'\n\nexport default class WordSplitter {\n private text: string\n private isBlockCheckRequired: boolean\n private blockLocations: BlockFinderResult\n private mode: Mode\n private isGrouping = false\n private globbingUntil: number\n private currentWord: string[]\n private words: string[]\n private static NotGlobbing = -1\n\n private get currentWordHasChars() {\n return this.currentWord.length > 0\n }\n\n constructor(text: string, blockExpressions: RegExp[]) {\n this.text = text\n this.blockLocations = new BlockFinder(text, blockExpressions).findBlocks()\n this.isBlockCheckRequired = this.blockLocations.hasBlocks\n this.mode = Mode.Character\n this.globbingUntil = WordSplitter.NotGlobbing\n this.currentWord = []\n this.words = []\n }\n\n process(): string[] {\n for (let index = 0; index < this.text.length; index++) {\n const character = this.text.charAt(index)\n this.processCharacter(index, character)\n }\n\n this.appendCurrentWordToWords()\n return this.words\n }\n\n private processCharacter(index: number, character: string) {\n if (this.isGlobbing(index, character)) {\n return\n }\n\n switch (this.mode) {\n case Mode.Character:\n this.processTextCharacter(character)\n break\n case Mode.Tag:\n this.processHtmlTagContinuation(character)\n break\n case Mode.Whitespace:\n this.processWhiteSpaceContinuation(character)\n break\n case Mode.Entity:\n this.processEntityContinuation(character)\n break\n }\n }\n\n private processEntityContinuation(character: string) {\n if (Utils.isStartOfTag(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Tag\n } else if (character.trim().length === 0) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Whitespace\n } else if (Utils.isEndOfEntity(character)) {\n let switchToNextMode = true\n if (this.currentWordHasChars) {\n this.currentWord.push(character)\n this.words.push(this.currentWord.join(''))\n\n //join &nbsp; entity with last whitespace\n if (\n this.words.length > 2 &&\n Utils.isWhiteSpace(this.words[this.words.length - 2]) &&\n Utils.isWhiteSpace(this.words[this.words.length - 1])\n ) {\n const w1 = this.words[this.words.length - 2]\n const w2 = this.words[this.words.length - 1]\n this.words.splice(this.words.length - 2, 2)\n this.currentWord = `${w1}${w2}`.split('')\n this.mode = Mode.Whitespace\n switchToNextMode = false\n }\n }\n\n if (switchToNextMode) {\n this.currentWord = []\n this.mode = Mode.Character\n }\n } else if (Utils.isWord(character)) {\n this.currentWord.push(character)\n } else {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Character\n }\n }\n\n private processWhiteSpaceContinuation(character: string) {\n if (Utils.isStartOfTag(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Tag\n } else if (Utils.isStartOfEntity(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Entity\n } else if (Utils.isWhiteSpace(character)) {\n this.currentWord.push(character)\n } else {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Character\n }\n }\n\n private processHtmlTagContinuation(character: string) {\n if (Utils.isEndOfTag(character)) {\n this.currentWord.push(character)\n this.appendCurrentWordToWords()\n this.mode = Utils.isWhiteSpace(character) ? Mode.Whitespace : Mode.Character\n } else {\n this.currentWord.push(character)\n }\n }\n\n private processTextCharacter(character: string) {\n if (Utils.isStartOfTag(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push('<')\n this.mode = Mode.Tag\n } else if (Utils.isStartOfEntity(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Entity\n } else if (Utils.isWhiteSpace(character)) {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n this.mode = Mode.Whitespace\n } else if (\n Utils.isWord(character) &&\n (this.currentWord.length === 0 || Utils.isWord(this.currentWord[this.currentWord.length - 1]))\n ) {\n this.currentWord.push(character)\n } else {\n this.appendCurrentWordToWords()\n this.currentWord.push(character)\n }\n }\n\n private appendCurrentWordToWords() {\n if (this.currentWordHasChars) {\n this.words.push(this.currentWord.join(''))\n this.currentWord = []\n }\n }\n\n private isGlobbing(index: number, character: string): boolean {\n if (!this.isBlockCheckRequired) {\n return false\n }\n const isCurrentBlockTerminating = index === this.globbingUntil\n if (isCurrentBlockTerminating) {\n this.globbingUntil = WordSplitter.NotGlobbing\n this.isGrouping = false\n this.appendCurrentWordToWords()\n }\n\n const until = this.blockLocations.isInBlock(index)\n if (until) {\n this.isGrouping = true\n this.globbingUntil = until\n }\n if (this.isGrouping) {\n this.currentWord.push(character)\n this.mode = Mode.Character\n }\n return this.isGrouping\n }\n\n static convertHtmlToListOfWords(text: string, blockExpressions: RegExp[]): string[] {\n return new WordSplitter(text, blockExpressions).process()\n }\n}\n\nclass BlockFinderResult {\n private blocks: Map<number, number> = new Map()\n\n addBlock(from: number, to: number) {\n if (this.blocks.has(from)) {\n throw new ArgumentError('One or more block expressions result in a text sequence that overlaps.')\n }\n\n this.blocks.set(from, to)\n }\n\n isInBlock(location: number): number | null {\n return this.blocks.get(location) ?? null\n }\n\n get hasBlocks() {\n return this.blocks.size > 0\n }\n}\n\nclass ArgumentError extends Error {}\n\nclass BlockFinder {\n private text: string\n private blockExpressions: RegExp[]\n\n constructor(text: string, blockExpressions: RegExp[]) {\n this.text = text\n this.blockExpressions = blockExpressions\n }\n\n findBlocks(): BlockFinderResult {\n const result = new BlockFinderResult()\n for (const expression of this.blockExpressions) {\n this.processBlockMatcher(expression, result)\n }\n return result\n }\n\n private processBlockMatcher(exp: RegExp, result: BlockFinderResult) {\n let match: RegExpExecArray | null\n // biome-ignore lint/suspicious/noAssignInExpressions: Couldn't think of a nicer way to do this\n while ((match = exp.exec(this.text)) !== null) {\n this.tryAddBlock(exp, match, result)\n }\n }\n\n private tryAddBlock(exp: RegExp, match: RegExpExecArray, result: BlockFinderResult) {\n try {\n const from = match.index\n const to = match.index + match[0].length\n result.addBlock(from, to)\n } catch {\n throw new ArgumentError(\n `One or more block expressions result in a text sequence that overlaps. Current expression: ${exp}`\n )\n }\n }\n}\n","import Action from './Action'\nimport Match from './Match'\nimport MatchFinder from './MatchFinder'\nimport Operation from './Operation'\nimport Utils from './Utils'\nimport WordSplitter from './WordSplitter'\n\nexport default class HtmlDiff {\n /**\n * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.\n * @private\n */\n private static MatchGranularityMaximum = 4\n\n private static DelTag = 'del'\n private static InsTag = 'ins'\n\n // ignore case\n private static SpecialCaseClosingTags = [\n '</strong>',\n '</em>',\n '</b>',\n '</i>',\n '</big>',\n '</small>',\n '</u>',\n '</sub>',\n '</sup>',\n '</strike>',\n '</s>',\n '</span>',\n ]\n\n private static SpecialCaseClosingTagsSet = new Set([\n '</strong>',\n '</em>',\n '</b>',\n '</i>',\n '</big>',\n '</small>',\n '</u>',\n '</sub>',\n '</sup>',\n '</strike>',\n '</s>',\n '</span>',\n ])\n\n private static SpecialCaseOpeningTagRegex =\n /<((strong)|(b)|(i)|(em)|(big)|(small)|(u)|(sub)|(sup)|(strike)|(s)|(span))[>\\s]+/i\n\n private static FormattingTags = new Set([\n 'strong',\n 'em',\n 'b',\n 'i',\n 'big',\n 'small',\n 'u',\n 'sub',\n 'sup',\n 'strike',\n 's',\n 'span',\n ])\n\n private content: string[] = []\n private newText: string\n private oldText: string\n\n private specialTagDiffStack: string[] = []\n private newWords: string[] = []\n private oldWords: string[] = []\n /**\n * Content-only projections of oldWords/newWords (structural tags and adjacent whitespace removed).\n * When null, no structural normalization is applied (the word arrays are identical for diffing).\n */\n private oldContentWords: string[] | null = null\n private newContentWords: string[] | null = null\n /** Maps content-word index → original word index */\n private oldContentToOriginal: number[] | null = null\n private newContentToOriginal: number[] | null = null\n /**\n * Tracks the next unwritten word index in oldWords/newWords. Mutated only by\n * {@link sliceOriginalWordsForOp} (each op reads a slice and advances its cursor).\n * Advances monotonically. Used so:\n * - subsequent equal/delete ops know where in old to resume from\n * - subsequent insert ops know where in new to resume from\n * The two cursors are independent: equal/delete output from old and advance the old\n * cursor; insert outputs from new and advances the new cursor.\n */\n private lastOriginalOldOutputIndex = 0\n private lastOriginalNewOutputIndex = 0\n private matchGranularity = 0\n private blockExpressions: RegExp[] = []\n\n /**\n * Defines how to compare repeating words. Valid values are from 0 to 1.\n * This value allows to exclude some words from comparison that eventually\n * reduces the total time of the diff algorithm.\n * 0 means that all words are excluded so the diff will not find any matching words at all.\n * 1 (default value) means that all words participate in comparison so this is the most accurate case.\n * 0.5 means that any word that occurs more than 50% times may be excluded from comparison. This doesn't\n * mean that such words will definitely be excluded but only gives a permission to exclude them if necessary.\n */\n repeatingWordsAccuracy = 1.0\n\n /**\n * If true all whitespaces are considered as equal\n */\n ignoreWhitespaceDifferences = false\n\n /**\n * If some match is too small and located far from its neighbors then it is considered as orphan\n * and removed. For example:\n * <code>\n * aaaaa bb ccccccccc dddddd ee\n * 11111 bb 222222222 dddddd ee\n * </code>\n * will find two matches <code>bb</code> and <code>dddddd ee</code> but the first will be considered\n * as orphan and ignored, as result it will consider texts <code>aaaaa bb ccccccccc</code> and\n * <code>11111 bb 222222222</code> as single replacement:\n * <code>\n * &lt;del&gt;aaaaa bb ccccccccc&lt;/del&gt;&lt;ins&gt;11111 bb 222222222&lt;/ins&gt; dddddd ee\n * </code>\n * This property defines relative size of the match to be considered as orphan, from 0 to 1.\n * 1 means that all matches will be considered as orphans.\n * 0 (default) means that no match will be considered as orphan.\n * 0.2 means that if match length is less than 20% of distance between its neighbors it is considered as orphan.\n */\n orphanMatchThreshold = 0.0\n\n /**\n * Initializes a new instance of the class.\n * @param oldText The old text.\n * @param newText The new text.\n */\n constructor(oldText: string, newText: string) {\n this.oldText = oldText\n this.newText = newText\n }\n\n static execute(oldText: string, newText: string) {\n return new HtmlDiff(oldText, newText).build()\n }\n\n /**\n * Builds the HTML diff output\n * @return HTML diff markup\n */\n build(): string {\n // If there is no difference, don't bother checking for differences\n if (this.oldText === this.newText) {\n return this.newText\n }\n\n this.splitInputsToWords()\n this.buildContentProjections()\n\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n this.matchGranularity = Math.min(\n HtmlDiff.MatchGranularityMaximum,\n Math.min(wordsForDiffOld.length, wordsForDiffNew.length)\n )\n\n const operations = this.operations()\n for (const op of operations) {\n this.performOperation(op)\n }\n\n return this.content.join('')\n }\n\n /**\n * Uses {@link expression} to group text together so that any change detected within the group is treated as a single block\n * @param expression\n */\n addBlockExpression(expression: RegExp) {\n this.blockExpressions.push(expression)\n }\n\n private splitInputsToWords() {\n this.oldWords = WordSplitter.convertHtmlToListOfWords(this.oldText, this.blockExpressions)\n\n // free memory, allow it for GC\n this.oldText = ''\n\n this.newWords = WordSplitter.convertHtmlToListOfWords(this.newText, this.blockExpressions)\n\n // free memory, allow it for GC\n this.newText = ''\n }\n\n /**\n * Builds \"content projections\" — word arrays with structural wrapper tags stripped — when\n * structural normalization is appropriate for these inputs. The diff algorithm operates on\n * the projections so wrapper-tag differences (e.g. `<p>` vs `<div>`) don't appear as content\n * changes; structural tags are then folded back in at output time.\n */\n private buildContentProjections() {\n if (!HtmlDiff.hasStructuralDifferences(this.oldWords, this.newWords)) return\n\n const oldProjection = HtmlDiff.createContentProjection(this.oldWords)\n const newProjection = HtmlDiff.createContentProjection(this.newWords)\n\n if (!HtmlDiff.shouldUseContentProjections(this.oldWords, this.newWords, oldProjection, newProjection)) {\n return\n }\n\n this.oldContentWords = oldProjection.contentWords\n this.oldContentToOriginal = oldProjection.contentToOriginal\n this.newContentWords = newProjection.contentWords\n this.newContentToOriginal = newProjection.contentToOriginal\n }\n\n /**\n * Decides whether structural normalization should be activated for this pair of inputs.\n * Each clause is a distinct correctness or fitness check — extend by adding a named\n * sub-predicate rather than chaining ad-hoc conditions.\n */\n private static shouldUseContentProjections(\n oldWords: string[],\n newWords: string[],\n oldProjection: { contentWords: string[]; contentToOriginal: number[] },\n newProjection: { contentWords: string[]; contentToOriginal: number[] }\n ): boolean {\n // One side has no content at all: that's a genuine addition/deletion, not a wrapper rename.\n // Normalization would mis-attribute the wrappers as part of the diff.\n if (oldProjection.contentWords.length === 0 || newProjection.contentWords.length === 0) return false\n\n // Asymmetric structural state: one side has no structural wrappers at all (e.g. plain text\n // vs. wrapped HTML). Normalization would force the equal output to use the unwrapped side's\n // (missing) structure and emit dangling closing tags from the wrapped side. The plain\n // word-level diff handles this correctly without normalization.\n const oldHasStructuralTags = oldProjection.contentWords.length < oldWords.length\n const newHasStructuralTags = newProjection.contentWords.length < newWords.length\n if (oldHasStructuralTags !== newHasStructuralTags) return false\n\n return true\n }\n\n /**\n * Tags that commonly serve as content wrappers and may change structurally\n * without affecting the actual content. Only these tags are stripped during\n * structural normalization.\n */\n private static WrapperTags = new Set(['div', 'p', 'section', 'article', 'main', 'header', 'footer', 'aside', 'nav'])\n\n private static isStructuralTag(word: string): boolean {\n if (!Utils.isTag(word)) return false\n const tagName = Utils.getTagName(word)\n return HtmlDiff.WrapperTags.has(tagName)\n }\n\n /** True when the word is a structural opening tag (e.g. `<p>`, `<div>`). */\n private static isOpeningStructuralTag(word: string): boolean {\n return HtmlDiff.isStructuralTag(word) && !word.startsWith('</')\n }\n\n /**\n * Returns true if words between structural tags are just whitespace (indentation).\n */\n private static isStructuralWhitespace(words: string[], index: number): boolean {\n if (!Utils.isWhiteSpace(words[index])) return false\n\n // Check if this whitespace is adjacent to a structural tag on either side\n const prevIsStructural = index === 0 || HtmlDiff.isStructuralTag(words[index - 1])\n const nextIsStructural = index === words.length - 1 || HtmlDiff.isStructuralTag(words[index + 1])\n return prevIsStructural || nextIsStructural\n }\n\n private static createContentProjection(words: string[]): {\n contentWords: string[]\n contentToOriginal: number[]\n } {\n const contentWords: string[] = []\n const contentToOriginal: number[] = []\n\n for (let i = 0; i < words.length; i++) {\n if (HtmlDiff.isStructuralTag(words[i])) continue\n if (HtmlDiff.isStructuralWhitespace(words, i)) continue\n contentWords.push(words[i])\n contentToOriginal.push(i)\n }\n\n return { contentWords, contentToOriginal }\n }\n\n private static hasStructuralDifferences(oldWords: string[], newWords: string[]): boolean {\n const oldStructural: string[] = []\n const newStructural: string[] = []\n\n // Compare only tag names (stripped of attributes) since structural normalization\n // is about wrapper tag name changes (e.g. <p> vs <div>), not attribute differences.\n // Attribute changes on the same tag name don't need projection-based normalization.\n for (const w of oldWords) {\n if (HtmlDiff.isStructuralTag(w)) {\n oldStructural.push(Utils.stripTagAttributes(w))\n }\n }\n for (const w of newWords) {\n if (HtmlDiff.isStructuralTag(w)) {\n newStructural.push(Utils.stripTagAttributes(w))\n }\n }\n\n if (oldStructural.length !== newStructural.length) return true\n for (let i = 0; i < oldStructural.length; i++) {\n if (oldStructural[i] !== newStructural[i]) return true\n }\n return false\n }\n\n private performOperation(operation: Operation) {\n switch (operation.action) {\n case Action.Equal:\n this.processEqualOperation(operation)\n break\n case Action.Delete:\n this.processDeleteOperation(operation, 'diffdel')\n break\n case Action.Insert:\n this.processInsertOperation(operation, 'diffins')\n break\n case Action.None:\n break\n case Action.Replace:\n this.processReplaceOperation(operation)\n break\n }\n }\n\n private processReplaceOperation(operation: Operation) {\n this.processDeleteOperation(operation, 'diffmod')\n this.processInsertOperation(operation, 'diffmod')\n }\n\n private processInsertOperation(operation: Operation, cssClass: string) {\n const words = this.usingContentProjections()\n ? this.sliceOriginalWordsForOp('new', operation.startInNew, operation.endInNew)\n : this.newWords.slice(operation.startInNew, operation.endInNew)\n this.insertTag(HtmlDiff.InsTag, cssClass, words)\n }\n\n private processDeleteOperation(operation: Operation, cssClass: string) {\n const words = this.usingContentProjections()\n ? this.sliceOriginalWordsForOp('old', operation.startInOld, operation.endInOld)\n : this.oldWords.slice(operation.startInOld, operation.endInOld)\n this.insertTag(HtmlDiff.DelTag, cssClass, words)\n }\n\n private processEqualOperation(operation: Operation) {\n if (this.usingContentProjections()) {\n // Output from old to preserve old's HTML structure for the matched content.\n const result = this.sliceOriginalWordsForOp('old', operation.startInOld, operation.endInOld)\n this.content.push(result.join(''))\n\n // Advance new-side tracking past the equivalent range in new so the next insert op\n // resumes from the correct position. We compute new's range with the same rule used\n // for old (rather than mirroring old's count) so the two sides are independently sound\n // when their structural tags don't perfectly parallel each other.\n this.sliceOriginalWordsForOp('new', operation.startInNew, operation.endInNew)\n } else {\n const result = this.newWords.slice(operation.startInNew, operation.endInNew)\n this.content.push(result.join(''))\n }\n }\n\n /** True when content projections are active for both sides — i.e. structural normalization is in effect. */\n private usingContentProjections(): boolean {\n return this.oldContentToOriginal !== null && this.newContentToOriginal !== null\n }\n\n /**\n * Returns the slice of original (old or new) words covering a content-index range,\n * including the structural tags that surround the content. Advances the side's cursor\n * past the slice so the next op resumes correctly.\n *\n * The slice extends:\n * - LEADING: from the side's cursor (or the first content word's original index,\n * whichever is smaller) so structural tags that precede the first content word\n * are picked up by this op rather than left orphaned.\n * - TRAILING (non-last range): from just after the last content word, including\n * closing structural tags that close *this* op's paragraphs, but stopping at\n * the first opening structural tag — that opening tag belongs to the next\n * op's paragraph and would otherwise be emitted twice.\n * - TRAILING (last range): all the way to the end of words, since there is no next\n * op to claim the trailing tags.\n */\n private sliceOriginalWordsForOp(side: 'old' | 'new', contentStart: number, contentEnd: number): string[] {\n const words = side === 'old' ? this.oldWords : this.newWords\n const contentToOriginal = side === 'old' ? this.oldContentToOriginal : this.newContentToOriginal\n\n if (!contentToOriginal) return words.slice(contentStart, contentEnd)\n if (contentStart >= contentEnd) return []\n\n const firstContentOrigIdx = contentToOriginal[contentStart]\n const lastContentOrigIdx = contentToOriginal[contentEnd - 1]\n const cursor = side === 'old' ? this.lastOriginalOldOutputIndex : this.lastOriginalNewOutputIndex\n const origStart = Math.min(cursor, firstContentOrigIdx)\n\n let origEnd: number\n if (contentEnd < contentToOriginal.length) {\n // Non-last range: walk trailing tags after the last content word, stopping at the\n // first opening structural tag so it can be emitted by the next op.\n const limit = contentToOriginal[contentEnd]\n origEnd = lastContentOrigIdx + 1\n while (origEnd < limit && !HtmlDiff.isOpeningStructuralTag(words[origEnd])) {\n origEnd++\n }\n } else {\n // Last range: include everything to the end.\n origEnd = words.length\n }\n\n if (side === 'old') {\n this.lastOriginalOldOutputIndex = origEnd\n } else {\n this.lastOriginalNewOutputIndex = origEnd\n }\n\n return words.slice(origStart, origEnd)\n }\n\n /**\n * This method encloses words within a specified tag (ins or del), and adds this into \"content\",\n * with a twist: if there are words contain tags, it actually creates multiple ins or del,\n * so that they don't include any ins or del. This handles cases like\n * old: '<p>a</p>'\n * new: '<p>ab</p>\n * <p>\n * c</b>'\n * diff result: '<p>a<ins>b</ins></p>\n * <p>\n * <ins>c</ins>\n * </p>\n * '\n * this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or\n * del tags), but handles correctly more cases than the earlier version.\n * P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.\n * @param tag\n * @param cssClass\n * @param words\n * @private\n */\n private insertTag(tag: string, cssClass: string, words: string[]) {\n while (true) {\n if (words.length === 0) {\n break\n }\n\n const allWordsUntilFirstTag = this.extractConsecutiveWords(words, x => !Utils.isTag(x))\n if (allWordsUntilFirstTag.length > 0) {\n const text = Utils.wrapText(allWordsUntilFirstTag.join(''), tag, cssClass)\n this.content.push(text)\n }\n\n const isInsertOpCompleted = words.length === 0\n if (isInsertOpCompleted) {\n break\n }\n\n // if there are still words left, they must start with a tag, but still can contain nonTag entries.\n // e.g. </span></big>bar\n // the remaining words need to be handled separately divided in a tagBlock, which definitely contains\n // at least one word and a potentially existing second block which starts with a nonTag but may\n // contain tags later on.\n const indexOfFirstNonTag = words.findIndex(x => !Utils.isTag(x))\n\n // if there are no nonTags, the whole block is a tagBlock and the index of the last tag is the last index of the block.\n // if there are nonTags, the index of the last tag is the index before the first nonTag.\n const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1\n\n let specialCaseTagInjection = ''\n let specialCaseTagInjectionIsBefore = false\n\n // handle opening tag\n if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {\n const tagNames = new Set<string>()\n for (const word of words) {\n if (Utils.isTag(word)) {\n tagNames.add(Utils.getTagName(word))\n }\n }\n const styledTagNames = Array.from(tagNames).join(' ')\n\n this.specialTagDiffStack.push(words[0])\n specialCaseTagInjection = `<ins class='mod ${styledTagNames}'>`\n if (tag === HtmlDiff.DelTag) {\n words.shift()\n\n // following tags may be formatting tags as well, follow through\n while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {\n words.shift()\n }\n }\n }\n // handle closing tag\n else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {\n const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop()\n // For delete operations: when the tag block contains a mix of formatting and\n // non-formatting closing tags (e.g. </strong></div>), compare against the first\n // closing tag (the formatting one) rather than the last tag in the block.\n // For purely formatting tag blocks (e.g. </i></strong>) or insert operations,\n // use the last tag as before to match against the outermost opening tag.\n let tagIndexToCompare = indexLastTagInFirstTagBlock\n if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {\n const hasNonFormattingClosingTag = words\n .slice(0, indexLastTagInFirstTagBlock + 1)\n .some(w => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))\n if (hasNonFormattingClosingTag) {\n tagIndexToCompare = 0\n }\n }\n const openingAndClosingTagsMatch =\n !!openingTag && Utils.getTagName(openingTag) === Utils.getTagName(words[tagIndexToCompare])\n\n if (!!openingTag && openingAndClosingTagsMatch) {\n specialCaseTagInjection = '</ins>'\n specialCaseTagInjectionIsBefore = true\n }\n\n // if the tag has a corresponding opening tag, but they don't match,\n // we need to push the opening tag back onto the stack\n else if (openingTag) {\n this.specialTagDiffStack.push(openingTag)\n }\n\n if (tag === HtmlDiff.DelTag) {\n words.shift()\n // following tags may be formatting tags as well, follow through\n while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {\n words.shift()\n }\n }\n }\n\n if (words.length === 0 && specialCaseTagInjection.length === 0) {\n break\n }\n\n // For delete operations, only extract non-formatting tags. Formatting tags (special case\n // opening/closing tags) need to be handled in the next loop iteration so they go through\n // the proper specialTagDiffStack logic. Otherwise, opening formatting tags get output as\n // plain tags and their corresponding closing tags are later discarded, producing invalid HTML.\n const isTagForExtraction =\n tag === HtmlDiff.DelTag\n ? (x: string) =>\n Utils.isTag(x) &&\n !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) &&\n !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase())\n : Utils.isTag\n\n if (specialCaseTagInjectionIsBefore) {\n this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(''))\n } else {\n this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join('') + specialCaseTagInjection)\n }\n\n if (words.length === 0) continue\n\n // if there are still words left, they must start with a nonTag and need to be handled in the next iteration.\n this.insertTag(tag, cssClass, words)\n break\n }\n }\n\n private extractConsecutiveWords(words: string[], condition: (character: string) => boolean): string[] {\n let indexOfFirstTag: number | null = null\n for (let i = 0; i < words.length; i++) {\n const word = words[i]\n if (i === 0 && word === ' ') {\n words[i] = '&nbsp;'\n }\n if (!condition(word)) {\n indexOfFirstTag = i\n break\n }\n }\n\n if (indexOfFirstTag !== null) {\n const items = words.slice(0, indexOfFirstTag)\n if (indexOfFirstTag > 0) {\n words.splice(0, indexOfFirstTag)\n }\n return items\n }\n\n const items = words.slice(0)\n words.splice(0, words.length)\n return items\n }\n\n private operations(): Operation[] {\n let positionInOld = 0\n let positionInNew = 0\n const operations: Operation[] = []\n\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n const matches = this.matchingBlocks()\n matches.push(new Match(wordsForDiffOld.length, wordsForDiffNew.length, 0))\n\n //Remove orphans from matches.\n //If distance between left and right matches is 4 times longer than length of current match then it is considered as orphan\n const matchesWithoutOrphans = this.removeOrphans(matches)\n\n for (const match of matchesWithoutOrphans) {\n const matchStartsAtCurrentPositionInOld = positionInOld === match.startInOld\n const matchStartsAtCurrentPositionInNew = positionInNew === match.startInNew\n\n let action: Action\n\n if (!matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {\n action = Action.Replace\n } else if (matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {\n action = Action.Insert\n } else if (!matchStartsAtCurrentPositionInOld) {\n action = Action.Delete\n } // This occurs if the first few words are the same in both versions\n else {\n action = Action.None\n }\n\n if (action !== Action.None) {\n operations.push(new Operation(action, positionInOld, match.startInOld, positionInNew, match.startInNew))\n }\n\n if (match.size !== 0) {\n operations.push(new Operation(Action.Equal, match.startInOld, match.endInOld, match.startInNew, match.endInNew))\n }\n\n positionInOld = match.endInOld\n positionInNew = match.endInNew\n }\n\n return operations\n }\n\n private *removeOrphans(matches: Match[]) {\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n let prev: Match = new Match(0, 0, 0)\n let curr: Match | null = null\n\n for (const next of matches) {\n if (curr === null) {\n curr = next\n continue\n }\n\n if (\n (prev.endInOld === curr.startInOld && prev.endInNew === curr.startInNew) ||\n (curr.endInOld === next.startInOld && curr.endInNew === next.startInNew)\n ) {\n //if match has no diff on the left or on the right\n yield curr\n prev = curr\n curr = next\n continue\n }\n\n let oldDistanceInChars = 0\n for (let i = prev.endInOld; i < next.startInOld; i++) {\n oldDistanceInChars += wordsForDiffOld[i].length\n }\n let newDistanceInChars = 0\n for (let i = prev.endInNew; i < next.startInNew; i++) {\n newDistanceInChars += wordsForDiffNew[i].length\n }\n let currMatchLengthInChars = 0\n for (let i = curr.startInNew; i < curr.endInNew; i++) {\n currMatchLengthInChars += wordsForDiffNew[i].length\n }\n\n if (currMatchLengthInChars > Math.max(oldDistanceInChars, newDistanceInChars) * this.orphanMatchThreshold) {\n yield curr\n }\n\n prev = curr\n curr = next\n }\n\n if (curr !== null) {\n yield curr //assume that the last match is always vital\n }\n }\n\n private matchingBlocks(): Match[] {\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n const matchingBlocks: Match[] = []\n this.findMatchingBlocks(0, wordsForDiffOld.length, 0, wordsForDiffNew.length, matchingBlocks)\n return matchingBlocks\n }\n\n private findMatchingBlocks(\n startInOld: number,\n endInOld: number,\n startInNew: number,\n endInNew: number,\n matchingBlocks: Match[]\n ) {\n const match = this.findMatch(startInOld, endInOld, startInNew, endInNew)\n\n if (match !== null) {\n if (startInOld < match.startInOld && startInNew < match.startInNew) {\n this.findMatchingBlocks(startInOld, match.startInOld, startInNew, match.startInNew, matchingBlocks)\n }\n\n matchingBlocks.push(match)\n\n if (match.endInOld < endInOld && match.endInNew < endInNew) {\n this.findMatchingBlocks(match.endInOld, endInOld, match.endInNew, endInNew, matchingBlocks)\n }\n }\n }\n\n private findMatch(startInOld: number, endInOld: number, startInNew: number, endInNew: number): Match | null {\n const wordsForDiffOld = this.oldContentWords ?? this.oldWords\n const wordsForDiffNew = this.newContentWords ?? this.newWords\n\n // For large texts it is more likely that there is a Match of size bigger than maximum granularity.\n // If not then go down and try to find it with smaller granularity.\n for (let i = this.matchGranularity; i > 0; i--) {\n const options = {\n blockSize: i,\n repeatingWordsAccuracy: this.repeatingWordsAccuracy,\n ignoreWhitespaceDifferences: this.ignoreWhitespaceDifferences,\n }\n const finder = new MatchFinder(\n wordsForDiffOld,\n wordsForDiffNew,\n startInOld,\n endInOld,\n startInNew,\n endInNew,\n options\n )\n const match = finder.findMatch()\n if (match !== null) return match\n }\n return null\n }\n}\n"],"mappings":";AAAA,IAAqB,QAArB,MAA2B;CACzB;CACA;CACA;CAEA,YAAY,YAAoB,YAAoB,MAAc;AAChE,OAAK,cAAc;AACnB,OAAK,cAAc;AACnB,OAAK,QAAQ;;CAGf,IAAI,aAAa;AACf,SAAO,KAAK;;CAGd,IAAI,aAAa;AACf,SAAO,KAAK;;CAGd,IAAI,OAAO;AACT,SAAO,KAAK;;CAGd,IAAI,WAAW;AACb,SAAO,KAAK,cAAc,KAAK;;CAGjC,IAAI,WAAW;AACb,SAAO,KAAK,cAAc,KAAK;;;;;AC5BnC,MAAM,kBAAkB;AACxB,MAAM,qBAAqB;AAC3B,MAAM,eAAe;AACrB,MAAM,kBAAkB;AACxB,MAAM,YAAY;AAClB,MAAM,WAAW;AAEjB,MAAM,sBAAyC,CAAC,OAAO;AAEvD,SAAgB,MAAM,MAAuB;AAC3C,KAAI,oBAAoB,MAAK,OAAM,MAAM,WAAW,GAAG,CAAC,CACtD,QAAO;AAGT,QAAO,aAAa,KAAK,IAAI,aAAa,KAAK;;AAGjD,SAAS,aAAa,MAAuB;AAC3C,QAAO,gBAAgB,KAAK,KAAK;;AAGnC,SAAS,aAAa,MAAuB;AAC3C,QAAO,mBAAmB,KAAK,KAAK;;AAGtC,SAAgB,mBAAmB,MAAsB;CACvD,MAAM,QAAQ,aAAa,KAAK,KAAK;AACrC,KAAI,MACF,QAAO,GAAG,MAAM,KAAK,KAAK,SAAS,KAAK,GAAG,OAAO;AAGpD,QAAO;;AAGT,SAAgB,SAAS,MAAc,SAAiB,UAA0B;AAChF,QAAO,IAAI,QAAQ,UAAU,SAAS,IAAI,KAAK,IAAI,QAAQ;;AAG7D,SAAgB,aAAa,KAAsB;AACjD,QAAO,QAAQ;;AAGjB,SAAgB,WAAW,KAAsB;AAC/C,QAAO,QAAQ;;AAGjB,SAAgB,gBAAgB,KAAsB;AACpD,QAAO,QAAQ;;AAGjB,SAAgB,cAAc,KAAsB;AAClD,QAAO,QAAQ;;AAGjB,SAAgB,aAAa,OAAwB;AACnD,QAAO,gBAAgB,KAAK,MAAM;;AAGpC,SAAgB,mBAAmB,MAAsB;AACvD,KAAI,MAAM,KAAK,CACb,QAAO,mBAAmB,KAAK;AAGjC,QAAO;;AAGT,SAAgB,OAAO,MAAuB;AAC5C,QAAO,UAAU,KAAK,KAAK;;AAG7B,SAAgB,WAAW,MAA6B;AACtD,KAAI,SAAS,KACX,QAAO;CAGT,MAAM,QAAQ,SAAS,KAAK,KAAK;AACjC,KAAI,MACF,QAAO,MAAM,QAAQ,KAAK,aAAa,IAAI,MAAM,GAAG,aAAa;AAGnE,QAAO;;AAGT,IAAA,gBAAe;CACb;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;;;;;;ACxFD,IAAqB,cAArB,MAAqB,YAAY;CAC/B;CACA;CACA;CACA;CACA;CACA;CACA,cAAoD,EAAE;CACtD;CAEA,YACE,UACA,UACA,YACA,UACA,YACA,UACA,SACA;AACA,OAAK,WAAW;AAChB,OAAK,WAAW;AAChB,OAAK,aAAa;AAClB,OAAK,WAAW;AAChB,OAAK,aAAa;AAClB,OAAK,WAAW;AAChB,OAAK,UAAU;;CAGjB,gBAAwB;AACtB,OAAK,cAAc,EAAE;EACrB,MAAM,QAAkB,EAAE;AAC1B,OAAK,IAAI,IAAI,KAAK,YAAY,IAAI,KAAK,UAAU,KAAK;GAEpD,MAAM,OAAO,KAAK,kBAAkB,KAAK,SAAS,GAAG;GACrD,MAAM,MAAM,YAAY,WAAW,OAAO,MAAM,KAAK,QAAQ,UAAU;AAEvE,OAAI,QAAQ,KACV;AAGF,OAAI,CAAC,KAAK,YAAY,KACpB,MAAK,YAAY,OAAO,EAAE;AAE5B,QAAK,YAAY,KAAK,KAAK,EAAE;;;CAIjC,OAAe,WAAW,OAAiB,MAAc,WAAkC;AACzF,QAAM,KAAK,KAAK;AAEhB,MAAI,MAAM,SAAS,UACjB,OAAM,OAAO;AAGf,MAAI,MAAM,WAAW,UACnB,QAAO;AAGT,SAAO,MAAM,KAAK,GAAG;;CAGvB,kBAA0B,MAAsB;EAC9C,MAAM,SAASA,cAAM,mBAAmB,KAAK;AAC7C,MAAI,KAAK,QAAQ,+BAA+BA,cAAM,aAAa,OAAO,CACxE,QAAO;AAGT,SAAO;;CAGT,YAA0B;AACxB,OAAK,eAAe;AACpB,OAAK,sBAAsB;EAE3B,IAAI,aAAa;AACjB,OAAK,MAAM,QAAQ,KAAK,aAAa;AACnC,gBAAa;AACb;;AAEF,MAAI,CAAC,WACH,QAAO;EAGT,IAAI,iBAAiB,KAAK;EAC1B,IAAI,iBAAiB,KAAK;EAC1B,IAAI,gBAAgB;EAEpB,IAAI,gCAAqC,IAAI,KAAK;EAClD,MAAM,QAAkB,EAAE;AAE1B,OAAK,IAAI,aAAa,KAAK,YAAY,aAAa,KAAK,UAAU,cAAc;GAC/E,MAAM,OAAO,KAAK,kBAAkB,KAAK,SAAS,YAAY;GAC9D,MAAM,QAAQ,YAAY,WAAW,OAAO,MAAM,KAAK,QAAQ,UAAU;AAEzE,OAAI,UAAU,KACZ;GAGF,MAAM,mCAAwC,IAAI,KAAK;AAEvD,OAAI,CAAC,KAAK,YAAY,QAAQ;AAC5B,oBAAgB;AAChB;;AAGF,QAAK,MAAM,cAAc,KAAK,YAAY,QAAQ;IAEhD,MAAM,kBAAkB,cAAc,IAAI,aAAa,EAAE,GAAG,cAAc,IAAI,aAAa,EAAE,GAAI,KAAK;AACtG,qBAAiB,IAAI,YAAY,eAAe;AAEhD,QAAI,iBAAiB,eAAe;AAClC,sBAAiB,aAAa,iBAAiB,KAAK,QAAQ,YAAY;AACxE,sBAAiB,aAAa,iBAAiB,KAAK,QAAQ,YAAY;AACxE,qBAAgB;;;AAIpB,mBAAgB;;AAGlB,SAAO,kBAAkB,IACrB,IAAI,MAAM,gBAAgB,gBAAgB,gBAAgB,KAAK,QAAQ,YAAY,EAAE,GACrF;;;;;;;;CASN,uBAA+B;EAC7B,MAAM,YAAY,KAAK,SAAS,SAAS,KAAK,QAAQ;EACtD,MAAM,iBAAiB,OAAO,QAAQ,KAAK,YAAY,CACpD,QAAQ,GAAG,aAAa,QAAQ,SAAS,UAAU,CACnD,KAAK,CAAC,UAAU,KAAK;AAExB,OAAK,MAAM,KAAK,eACd,QAAO,KAAK,YAAY;;;;;AC/I9B,IAAqB,YAArB,MAA+B;CAC7B;CACA;CACA;CACA;CACA;CAEA,YAAY,QAAgB,YAAoB,UAAkB,YAAoB,UAAkB;AACtG,OAAK,SAAS;AACd,OAAK,aAAa;AAClB,OAAK,WAAW;AAChB,OAAK,aAAa;AAClB,OAAK,WAAW;;;;;ACXpB,IAAqB,eAArB,MAAqB,aAAa;CAChC;CACA;CACA;CACA;CACA,aAAqB;CACrB;CACA;CACA;CACA,OAAe,cAAc;CAE7B,IAAY,sBAAsB;AAChC,SAAO,KAAK,YAAY,SAAS;;CAGnC,YAAY,MAAc,kBAA4B;AACpD,OAAK,OAAO;AACZ,OAAK,iBAAiB,IAAI,YAAY,MAAM,iBAAiB,CAAC,YAAY;AAC1E,OAAK,uBAAuB,KAAK,eAAe;AAChD,OAAK,OAAA;AACL,OAAK,gBAAgB,aAAa;AAClC,OAAK,cAAc,EAAE;AACrB,OAAK,QAAQ,EAAE;;CAGjB,UAAoB;AAClB,OAAK,IAAI,QAAQ,GAAG,QAAQ,KAAK,KAAK,QAAQ,SAAS;GACrD,MAAM,YAAY,KAAK,KAAK,OAAO,MAAM;AACzC,QAAK,iBAAiB,OAAO,UAAU;;AAGzC,OAAK,0BAA0B;AAC/B,SAAO,KAAK;;CAGd,iBAAyB,OAAe,WAAmB;AACzD,MAAI,KAAK,WAAW,OAAO,UAAU,CACnC;AAGF,UAAQ,KAAK,MAAb;GACE,KAAA;AACE,SAAK,qBAAqB,UAAU;AACpC;GACF,KAAA;AACE,SAAK,2BAA2B,UAAU;AAC1C;GACF,KAAA;AACE,SAAK,8BAA8B,UAAU;AAC7C;GACF,KAAA;AACE,SAAK,0BAA0B,UAAU;AACzC;;;CAIN,0BAAkC,WAAmB;AACnD,MAAIC,cAAM,aAAa,UAAU,EAAE;AACjC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACI,UAAU,MAAM,CAAC,WAAW,GAAG;AACxC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,cAAc,UAAU,EAAE;GACzC,IAAI,mBAAmB;AACvB,OAAI,KAAK,qBAAqB;AAC5B,SAAK,YAAY,KAAK,UAAU;AAChC,SAAK,MAAM,KAAK,KAAK,YAAY,KAAK,GAAG,CAAC;AAG1C,QACE,KAAK,MAAM,SAAS,KACpBA,cAAM,aAAa,KAAK,MAAM,KAAK,MAAM,SAAS,GAAG,IACrDA,cAAM,aAAa,KAAK,MAAM,KAAK,MAAM,SAAS,GAAG,EACrD;KACA,MAAM,KAAK,KAAK,MAAM,KAAK,MAAM,SAAS;KAC1C,MAAM,KAAK,KAAK,MAAM,KAAK,MAAM,SAAS;AAC1C,UAAK,MAAM,OAAO,KAAK,MAAM,SAAS,GAAG,EAAE;AAC3C,UAAK,cAAc,GAAG,KAAK,KAAK,MAAM,GAAG;AACzC,UAAK,OAAA;AACL,wBAAmB;;;AAIvB,OAAI,kBAAkB;AACpB,SAAK,cAAc,EAAE;AACrB,SAAK,OAAA;;aAEEA,cAAM,OAAO,UAAU,CAChC,MAAK,YAAY,KAAK,UAAU;OAC3B;AACL,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;;;CAIT,8BAAsC,WAAmB;AACvD,MAAIA,cAAM,aAAa,UAAU,EAAE;AACjC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,gBAAgB,UAAU,EAAE;AAC3C,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,aAAa,UAAU,CACtC,MAAK,YAAY,KAAK,UAAU;OAC3B;AACL,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;;;CAIT,2BAAmC,WAAmB;AACpD,MAAIA,cAAM,WAAW,UAAU,EAAE;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,0BAA0B;AAC/B,QAAK,OAAOA,cAAM,aAAa,UAAU,GAAA,IAAA;QAEzC,MAAK,YAAY,KAAK,UAAU;;CAIpC,qBAA6B,WAAmB;AAC9C,MAAIA,cAAM,aAAa,UAAU,EAAE;AACjC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,IAAI;AAC1B,QAAK,OAAA;aACIA,cAAM,gBAAgB,UAAU,EAAE;AAC3C,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aACIA,cAAM,aAAa,UAAU,EAAE;AACxC,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;aAELA,cAAM,OAAO,UAAU,KACtB,KAAK,YAAY,WAAW,KAAKA,cAAM,OAAO,KAAK,YAAY,KAAK,YAAY,SAAS,GAAG,EAE7F,MAAK,YAAY,KAAK,UAAU;OAC3B;AACL,QAAK,0BAA0B;AAC/B,QAAK,YAAY,KAAK,UAAU;;;CAIpC,2BAAmC;AACjC,MAAI,KAAK,qBAAqB;AAC5B,QAAK,MAAM,KAAK,KAAK,YAAY,KAAK,GAAG,CAAC;AAC1C,QAAK,cAAc,EAAE;;;CAIzB,WAAmB,OAAe,WAA4B;AAC5D,MAAI,CAAC,KAAK,qBACR,QAAO;AAGT,MADkC,UAAU,KAAK,eAClB;AAC7B,QAAK,gBAAgB,aAAa;AAClC,QAAK,aAAa;AAClB,QAAK,0BAA0B;;EAGjC,MAAM,QAAQ,KAAK,eAAe,UAAU,MAAM;AAClD,MAAI,OAAO;AACT,QAAK,aAAa;AAClB,QAAK,gBAAgB;;AAEvB,MAAI,KAAK,YAAY;AACnB,QAAK,YAAY,KAAK,UAAU;AAChC,QAAK,OAAA;;AAEP,SAAO,KAAK;;CAGd,OAAO,yBAAyB,MAAc,kBAAsC;AAClF,SAAO,IAAI,aAAa,MAAM,iBAAiB,CAAC,SAAS;;;AAI7D,IAAM,oBAAN,MAAwB;CACtB,yBAAsC,IAAI,KAAK;CAE/C,SAAS,MAAc,IAAY;AACjC,MAAI,KAAK,OAAO,IAAI,KAAK,CACvB,OAAM,IAAI,cAAc,yEAAyE;AAGnG,OAAK,OAAO,IAAI,MAAM,GAAG;;CAG3B,UAAU,UAAiC;AACzC,SAAO,KAAK,OAAO,IAAI,SAAS,IAAI;;CAGtC,IAAI,YAAY;AACd,SAAO,KAAK,OAAO,OAAO;;;AAI9B,IAAM,gBAAN,cAA4B,MAAM;AAElC,IAAM,cAAN,MAAkB;CAChB;CACA;CAEA,YAAY,MAAc,kBAA4B;AACpD,OAAK,OAAO;AACZ,OAAK,mBAAmB;;CAG1B,aAAgC;EAC9B,MAAM,SAAS,IAAI,mBAAmB;AACtC,OAAK,MAAM,cAAc,KAAK,iBAC5B,MAAK,oBAAoB,YAAY,OAAO;AAE9C,SAAO;;CAGT,oBAA4B,KAAa,QAA2B;EAClE,IAAI;AAEJ,UAAQ,QAAQ,IAAI,KAAK,KAAK,KAAK,MAAM,KACvC,MAAK,YAAY,KAAK,OAAO,OAAO;;CAIxC,YAAoB,KAAa,OAAwB,QAA2B;AAClF,MAAI;GACF,MAAM,OAAO,MAAM;GACnB,MAAM,KAAK,MAAM,QAAQ,MAAM,GAAG;AAClC,UAAO,SAAS,MAAM,GAAG;UACnB;AACN,SAAM,IAAI,cACR,8FAA8F,MAC/F;;;;;;AC7OP,IAAqB,WAArB,MAAqB,SAAS;;;;;CAK5B,OAAe,0BAA0B;CAEzC,OAAe,SAAS;CACxB,OAAe,SAAS;CAGxB,OAAe,yBAAyB;EACtC;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACD;CAED,OAAe,4BAA4B,IAAI,IAAI;EACjD;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACD,CAAC;CAEF,OAAe,6BACb;CAEF,OAAe,iBAAiB,IAAI,IAAI;EACtC;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACD,CAAC;CAEF,UAA4B,EAAE;CAC9B;CACA;CAEA,sBAAwC,EAAE;CAC1C,WAA6B,EAAE;CAC/B,WAA6B,EAAE;;;;;CAK/B,kBAA2C;CAC3C,kBAA2C;;CAE3C,uBAAgD;CAChD,uBAAgD;;;;;;;;;;CAUhD,6BAAqC;CACrC,6BAAqC;CACrC,mBAA2B;CAC3B,mBAAqC,EAAE;;;;;;;;;;CAWvC,yBAAyB;;;;CAKzB,8BAA8B;;;;;;;;;;;;;;;;;;;CAoB9B,uBAAuB;;;;;;CAOvB,YAAY,SAAiB,SAAiB;AAC5C,OAAK,UAAU;AACf,OAAK,UAAU;;CAGjB,OAAO,QAAQ,SAAiB,SAAiB;AAC/C,SAAO,IAAI,SAAS,SAAS,QAAQ,CAAC,OAAO;;;;;;CAO/C,QAAgB;AAEd,MAAI,KAAK,YAAY,KAAK,QACxB,QAAO,KAAK;AAGd,OAAK,oBAAoB;AACzB,OAAK,yBAAyB;EAE9B,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;AAErD,OAAK,mBAAmB,KAAK,IAC3B,SAAS,yBACT,KAAK,IAAI,gBAAgB,QAAQ,gBAAgB,OAAO,CACzD;EAED,MAAM,aAAa,KAAK,YAAY;AACpC,OAAK,MAAM,MAAM,WACf,MAAK,iBAAiB,GAAG;AAG3B,SAAO,KAAK,QAAQ,KAAK,GAAG;;;;;;CAO9B,mBAAmB,YAAoB;AACrC,OAAK,iBAAiB,KAAK,WAAW;;CAGxC,qBAA6B;AAC3B,OAAK,WAAW,aAAa,yBAAyB,KAAK,SAAS,KAAK,iBAAiB;AAG1F,OAAK,UAAU;AAEf,OAAK,WAAW,aAAa,yBAAyB,KAAK,SAAS,KAAK,iBAAiB;AAG1F,OAAK,UAAU;;;;;;;;CASjB,0BAAkC;AAChC,MAAI,CAAC,SAAS,yBAAyB,KAAK,UAAU,KAAK,SAAS,CAAE;EAEtE,MAAM,gBAAgB,SAAS,wBAAwB,KAAK,SAAS;EACrE,MAAM,gBAAgB,SAAS,wBAAwB,KAAK,SAAS;AAErE,MAAI,CAAC,SAAS,4BAA4B,KAAK,UAAU,KAAK,UAAU,eAAe,cAAc,CACnG;AAGF,OAAK,kBAAkB,cAAc;AACrC,OAAK,uBAAuB,cAAc;AAC1C,OAAK,kBAAkB,cAAc;AACrC,OAAK,uBAAuB,cAAc;;;;;;;CAQ5C,OAAe,4BACb,UACA,UACA,eACA,eACS;AAGT,MAAI,cAAc,aAAa,WAAW,KAAK,cAAc,aAAa,WAAW,EAAG,QAAO;AAQ/F,MAF6B,cAAc,aAAa,SAAS,SAAS,WAC7C,cAAc,aAAa,SAAS,SAAS,OACvB,QAAO;AAE1D,SAAO;;;;;;;CAQT,OAAe,cAAc,IAAI,IAAI;EAAC;EAAO;EAAK;EAAW;EAAW;EAAQ;EAAU;EAAU;EAAS;EAAM,CAAC;CAEpH,OAAe,gBAAgB,MAAuB;AACpD,MAAI,CAACC,cAAM,MAAM,KAAK,CAAE,QAAO;EAC/B,MAAM,UAAUA,cAAM,WAAW,KAAK;AACtC,SAAO,SAAS,YAAY,IAAI,QAAQ;;;CAI1C,OAAe,uBAAuB,MAAuB;AAC3D,SAAO,SAAS,gBAAgB,KAAK,IAAI,CAAC,KAAK,WAAW,KAAK;;;;;CAMjE,OAAe,uBAAuB,OAAiB,OAAwB;AAC7E,MAAI,CAACA,cAAM,aAAa,MAAM,OAAO,CAAE,QAAO;EAG9C,MAAM,mBAAmB,UAAU,KAAK,SAAS,gBAAgB,MAAM,QAAQ,GAAG;EAClF,MAAM,mBAAmB,UAAU,MAAM,SAAS,KAAK,SAAS,gBAAgB,MAAM,QAAQ,GAAG;AACjG,SAAO,oBAAoB;;CAG7B,OAAe,wBAAwB,OAGrC;EACA,MAAM,eAAyB,EAAE;EACjC,MAAM,oBAA8B,EAAE;AAEtC,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,OAAI,SAAS,gBAAgB,MAAM,GAAG,CAAE;AACxC,OAAI,SAAS,uBAAuB,OAAO,EAAE,CAAE;AAC/C,gBAAa,KAAK,MAAM,GAAG;AAC3B,qBAAkB,KAAK,EAAE;;AAG3B,SAAO;GAAE;GAAc;GAAmB;;CAG5C,OAAe,yBAAyB,UAAoB,UAA6B;EACvF,MAAM,gBAA0B,EAAE;EAClC,MAAM,gBAA0B,EAAE;AAKlC,OAAK,MAAM,KAAK,SACd,KAAI,SAAS,gBAAgB,EAAE,CAC7B,eAAc,KAAKA,cAAM,mBAAmB,EAAE,CAAC;AAGnD,OAAK,MAAM,KAAK,SACd,KAAI,SAAS,gBAAgB,EAAE,CAC7B,eAAc,KAAKA,cAAM,mBAAmB,EAAE,CAAC;AAInD,MAAI,cAAc,WAAW,cAAc,OAAQ,QAAO;AAC1D,OAAK,IAAI,IAAI,GAAG,IAAI,cAAc,QAAQ,IACxC,KAAI,cAAc,OAAO,cAAc,GAAI,QAAO;AAEpD,SAAO;;CAGT,iBAAyB,WAAsB;AAC7C,UAAQ,UAAU,QAAlB;GACE,KAAA;AACE,SAAK,sBAAsB,UAAU;AACrC;GACF,KAAA;AACE,SAAK,uBAAuB,WAAW,UAAU;AACjD;GACF,KAAA;AACE,SAAK,uBAAuB,WAAW,UAAU;AACjD;GACF,KAAA,EACE;GACF,KAAA;AACE,SAAK,wBAAwB,UAAU;AACvC;;;CAIN,wBAAgC,WAAsB;AACpD,OAAK,uBAAuB,WAAW,UAAU;AACjD,OAAK,uBAAuB,WAAW,UAAU;;CAGnD,uBAA+B,WAAsB,UAAkB;EACrE,MAAM,QAAQ,KAAK,yBAAyB,GACxC,KAAK,wBAAwB,OAAO,UAAU,YAAY,UAAU,SAAS,GAC7E,KAAK,SAAS,MAAM,UAAU,YAAY,UAAU,SAAS;AACjE,OAAK,UAAU,SAAS,QAAQ,UAAU,MAAM;;CAGlD,uBAA+B,WAAsB,UAAkB;EACrE,MAAM,QAAQ,KAAK,yBAAyB,GACxC,KAAK,wBAAwB,OAAO,UAAU,YAAY,UAAU,SAAS,GAC7E,KAAK,SAAS,MAAM,UAAU,YAAY,UAAU,SAAS;AACjE,OAAK,UAAU,SAAS,QAAQ,UAAU,MAAM;;CAGlD,sBAA8B,WAAsB;AAClD,MAAI,KAAK,yBAAyB,EAAE;GAElC,MAAM,SAAS,KAAK,wBAAwB,OAAO,UAAU,YAAY,UAAU,SAAS;AAC5F,QAAK,QAAQ,KAAK,OAAO,KAAK,GAAG,CAAC;AAMlC,QAAK,wBAAwB,OAAO,UAAU,YAAY,UAAU,SAAS;SACxE;GACL,MAAM,SAAS,KAAK,SAAS,MAAM,UAAU,YAAY,UAAU,SAAS;AAC5E,QAAK,QAAQ,KAAK,OAAO,KAAK,GAAG,CAAC;;;;CAKtC,0BAA2C;AACzC,SAAO,KAAK,yBAAyB,QAAQ,KAAK,yBAAyB;;;;;;;;;;;;;;;;;;CAmB7E,wBAAgC,MAAqB,cAAsB,YAA8B;EACvG,MAAM,QAAQ,SAAS,QAAQ,KAAK,WAAW,KAAK;EACpD,MAAM,oBAAoB,SAAS,QAAQ,KAAK,uBAAuB,KAAK;AAE5E,MAAI,CAAC,kBAAmB,QAAO,MAAM,MAAM,cAAc,WAAW;AACpE,MAAI,gBAAgB,WAAY,QAAO,EAAE;EAEzC,MAAM,sBAAsB,kBAAkB;EAC9C,MAAM,qBAAqB,kBAAkB,aAAa;EAC1D,MAAM,SAAS,SAAS,QAAQ,KAAK,6BAA6B,KAAK;EACvE,MAAM,YAAY,KAAK,IAAI,QAAQ,oBAAoB;EAEvD,IAAI;AACJ,MAAI,aAAa,kBAAkB,QAAQ;GAGzC,MAAM,QAAQ,kBAAkB;AAChC,aAAU,qBAAqB;AAC/B,UAAO,UAAU,SAAS,CAAC,SAAS,uBAAuB,MAAM,SAAS,CACxE;QAIF,WAAU,MAAM;AAGlB,MAAI,SAAS,MACX,MAAK,6BAA6B;MAElC,MAAK,6BAA6B;AAGpC,SAAO,MAAM,MAAM,WAAW,QAAQ;;;;;;;;;;;;;;;;;;;;;;;CAwBxC,UAAkB,KAAa,UAAkB,OAAiB;AAChE,SAAO,MAAM;AACX,OAAI,MAAM,WAAW,EACnB;GAGF,MAAM,wBAAwB,KAAK,wBAAwB,QAAO,MAAK,CAACA,cAAM,MAAM,EAAE,CAAC;AACvF,OAAI,sBAAsB,SAAS,GAAG;IACpC,MAAM,OAAOA,cAAM,SAAS,sBAAsB,KAAK,GAAG,EAAE,KAAK,SAAS;AAC1E,SAAK,QAAQ,KAAK,KAAK;;AAIzB,OAD4B,MAAM,WAAW,EAE3C;GAQF,MAAM,qBAAqB,MAAM,WAAU,MAAK,CAACA,cAAM,MAAM,EAAE,CAAC;GAIhE,MAAM,8BAA8B,uBAAuB,KAAK,MAAM,SAAS,IAAI,qBAAqB;GAExG,IAAI,0BAA0B;GAC9B,IAAI,kCAAkC;AAGtC,OAAI,SAAS,2BAA2B,KAAK,MAAM,GAAG,EAAE;IACtD,MAAM,2BAAW,IAAI,KAAa;AAClC,SAAK,MAAM,QAAQ,MACjB,KAAIA,cAAM,MAAM,KAAK,CACnB,UAAS,IAAIA,cAAM,WAAW,KAAK,CAAC;IAGxC,MAAM,iBAAiB,MAAM,KAAK,SAAS,CAAC,KAAK,IAAI;AAErD,SAAK,oBAAoB,KAAK,MAAM,GAAG;AACvC,8BAA0B,mBAAmB,eAAe;AAC5D,QAAI,QAAQ,SAAS,QAAQ;AAC3B,WAAM,OAAO;AAGb,YAAO,MAAM,SAAS,KAAK,SAAS,2BAA2B,KAAK,MAAM,GAAG,CAC3E,OAAM,OAAO;;cAKV,SAAS,0BAA0B,IAAI,MAAM,GAAG,aAAa,CAAC,EAAE;IACvE,MAAM,aAAa,KAAK,oBAAoB,WAAW,IAAI,OAAO,KAAK,oBAAoB,KAAK;IAMhG,IAAI,oBAAoB;AACxB,QAAI,QAAQ,SAAS,UAAU,uBAAuB;SACjB,MAChC,MAAM,GAAG,8BAA8B,EAAE,CACzC,MAAK,MAAK,CAAC,SAAS,0BAA0B,IAAI,EAAE,aAAa,CAAC,CAAC,CAEpE,qBAAoB;;IAGxB,MAAM,6BACJ,CAAC,CAAC,cAAcA,cAAM,WAAW,WAAW,KAAKA,cAAM,WAAW,MAAM,mBAAmB;AAE7F,QAAI,CAAC,CAAC,cAAc,4BAA4B;AAC9C,+BAA0B;AAC1B,uCAAkC;eAK3B,WACP,MAAK,oBAAoB,KAAK,WAAW;AAG3C,QAAI,QAAQ,SAAS,QAAQ;AAC3B,WAAM,OAAO;AAEb,YAAO,MAAM,SAAS,KAAK,SAAS,0BAA0B,IAAI,MAAM,GAAG,aAAa,CAAC,CACvF,OAAM,OAAO;;;AAKnB,OAAI,MAAM,WAAW,KAAK,wBAAwB,WAAW,EAC3D;GAOF,MAAM,qBACJ,QAAQ,SAAS,UACZ,MACCA,cAAM,MAAM,EAAE,IACd,CAAC,SAAS,2BAA2B,KAAK,EAAE,IAC5C,CAAC,SAAS,0BAA0B,IAAI,EAAE,aAAa,CAAC,GAC1DA,cAAM;AAEZ,OAAI,gCACF,MAAK,QAAQ,KAAK,0BAA0B,KAAK,wBAAwB,OAAO,mBAAmB,CAAC,KAAK,GAAG,CAAC;OAE7G,MAAK,QAAQ,KAAK,KAAK,wBAAwB,OAAO,mBAAmB,CAAC,KAAK,GAAG,GAAG,wBAAwB;AAG/G,OAAI,MAAM,WAAW,EAAG;AAGxB,QAAK,UAAU,KAAK,UAAU,MAAM;AACpC;;;CAIJ,wBAAgC,OAAiB,WAAqD;EACpG,IAAI,kBAAiC;AACrC,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACrC,MAAM,OAAO,MAAM;AACnB,OAAI,MAAM,KAAK,SAAS,IACtB,OAAM,KAAK;AAEb,OAAI,CAAC,UAAU,KAAK,EAAE;AACpB,sBAAkB;AAClB;;;AAIJ,MAAI,oBAAoB,MAAM;GAC5B,MAAM,QAAQ,MAAM,MAAM,GAAG,gBAAgB;AAC7C,OAAI,kBAAkB,EACpB,OAAM,OAAO,GAAG,gBAAgB;AAElC,UAAO;;EAGT,MAAM,QAAQ,MAAM,MAAM,EAAE;AAC5B,QAAM,OAAO,GAAG,MAAM,OAAO;AAC7B,SAAO;;CAGT,aAAkC;EAChC,IAAI,gBAAgB;EACpB,IAAI,gBAAgB;EACpB,MAAM,aAA0B,EAAE;EAElC,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EAErD,MAAM,UAAU,KAAK,gBAAgB;AACrC,UAAQ,KAAK,IAAI,MAAM,gBAAgB,QAAQ,gBAAgB,QAAQ,EAAE,CAAC;EAI1E,MAAM,wBAAwB,KAAK,cAAc,QAAQ;AAEzD,OAAK,MAAM,SAAS,uBAAuB;GACzC,MAAM,oCAAoC,kBAAkB,MAAM;GAClE,MAAM,oCAAoC,kBAAkB,MAAM;GAElE,IAAI;AAEJ,OAAI,CAAC,qCAAqC,CAAC,kCACzC,UAAA;YACS,qCAAqC,CAAC,kCAC/C,UAAA;YACS,CAAC,kCACV,UAAA;OAGA,UAAA;AAGF,OAAI,WAAA,EACF,YAAW,KAAK,IAAI,UAAU,QAAQ,eAAe,MAAM,YAAY,eAAe,MAAM,WAAW,CAAC;AAG1G,OAAI,MAAM,SAAS,EACjB,YAAW,KAAK,IAAI,UAAA,GAAwB,MAAM,YAAY,MAAM,UAAU,MAAM,YAAY,MAAM,SAAS,CAAC;AAGlH,mBAAgB,MAAM;AACtB,mBAAgB,MAAM;;AAGxB,SAAO;;CAGT,CAAS,cAAc,SAAkB;EACvC,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EAErD,IAAI,OAAc,IAAI,MAAM,GAAG,GAAG,EAAE;EACpC,IAAI,OAAqB;AAEzB,OAAK,MAAM,QAAQ,SAAS;AAC1B,OAAI,SAAS,MAAM;AACjB,WAAO;AACP;;AAGF,OACG,KAAK,aAAa,KAAK,cAAc,KAAK,aAAa,KAAK,cAC5D,KAAK,aAAa,KAAK,cAAc,KAAK,aAAa,KAAK,YAC7D;AAEA,UAAM;AACN,WAAO;AACP,WAAO;AACP;;GAGF,IAAI,qBAAqB;AACzB,QAAK,IAAI,IAAI,KAAK,UAAU,IAAI,KAAK,YAAY,IAC/C,uBAAsB,gBAAgB,GAAG;GAE3C,IAAI,qBAAqB;AACzB,QAAK,IAAI,IAAI,KAAK,UAAU,IAAI,KAAK,YAAY,IAC/C,uBAAsB,gBAAgB,GAAG;GAE3C,IAAI,yBAAyB;AAC7B,QAAK,IAAI,IAAI,KAAK,YAAY,IAAI,KAAK,UAAU,IAC/C,2BAA0B,gBAAgB,GAAG;AAG/C,OAAI,yBAAyB,KAAK,IAAI,oBAAoB,mBAAmB,GAAG,KAAK,qBACnF,OAAM;AAGR,UAAO;AACP,UAAO;;AAGT,MAAI,SAAS,KACX,OAAM;;CAIV,iBAAkC;EAChC,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,iBAA0B,EAAE;AAClC,OAAK,mBAAmB,GAAG,gBAAgB,QAAQ,GAAG,gBAAgB,QAAQ,eAAe;AAC7F,SAAO;;CAGT,mBACE,YACA,UACA,YACA,UACA,gBACA;EACA,MAAM,QAAQ,KAAK,UAAU,YAAY,UAAU,YAAY,SAAS;AAExE,MAAI,UAAU,MAAM;AAClB,OAAI,aAAa,MAAM,cAAc,aAAa,MAAM,WACtD,MAAK,mBAAmB,YAAY,MAAM,YAAY,YAAY,MAAM,YAAY,eAAe;AAGrG,kBAAe,KAAK,MAAM;AAE1B,OAAI,MAAM,WAAW,YAAY,MAAM,WAAW,SAChD,MAAK,mBAAmB,MAAM,UAAU,UAAU,MAAM,UAAU,UAAU,eAAe;;;CAKjG,UAAkB,YAAoB,UAAkB,YAAoB,UAAgC;EAC1G,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;EACrD,MAAM,kBAAkB,KAAK,mBAAmB,KAAK;AAIrD,OAAK,IAAI,IAAI,KAAK,kBAAkB,IAAI,GAAG,KAAK;GAe9C,MAAM,QATS,IAAI,YACjB,iBACA,iBACA,YACA,UACA,YACA,UAXc;IACd,WAAW;IACX,wBAAwB,KAAK;IAC7B,6BAA6B,KAAK;IACnC,CASA,CACoB,WAAW;AAChC,OAAI,UAAU,KAAM,QAAO;;AAE7B,SAAO"}
@@ -26,8 +26,17 @@ declare class HtmlDiff {
26
26
  /** Maps content-word index → original word index */
27
27
  private oldContentToOriginal;
28
28
  private newContentToOriginal;
29
- /** Tracks the last original old word index output, so equal operations can include leading structural tags */
29
+ /**
30
+ * Tracks the next unwritten word index in oldWords/newWords. Mutated only by
31
+ * {@link sliceOriginalWordsForOp} (each op reads a slice and advances its cursor).
32
+ * Advances monotonically. Used so:
33
+ * - subsequent equal/delete ops know where in old to resume from
34
+ * - subsequent insert ops know where in new to resume from
35
+ * The two cursors are independent: equal/delete output from old and advance the old
36
+ * cursor; insert outputs from new and advances the new cursor.
37
+ */
30
38
  private lastOriginalOldOutputIndex;
39
+ private lastOriginalNewOutputIndex;
31
40
  private matchGranularity;
32
41
  private blockExpressions;
33
42
  /**
@@ -82,11 +91,18 @@ declare class HtmlDiff {
82
91
  addBlockExpression(expression: RegExp): void;
83
92
  private splitInputsToWords;
84
93
  /**
85
- * Checks whether the two word arrays have structural HTML differences (different non-formatting tags
86
- * or different whitespace between structural tags). When they do, builds "content projections" that
87
- * strip structural noise so the diff algorithm only sees meaningful content and formatting changes.
94
+ * Builds "content projections" word arrays with structural wrapper tags stripped when
95
+ * structural normalization is appropriate for these inputs. The diff algorithm operates on
96
+ * the projections so wrapper-tag differences (e.g. `<p>` vs `<div>`) don't appear as content
97
+ * changes; structural tags are then folded back in at output time.
88
98
  */
89
99
  private buildContentProjections;
100
+ /**
101
+ * Decides whether structural normalization should be activated for this pair of inputs.
102
+ * Each clause is a distinct correctness or fitness check — extend by adding a named
103
+ * sub-predicate rather than chaining ad-hoc conditions.
104
+ */
105
+ private static shouldUseContentProjections;
90
106
  /**
91
107
  * Tags that commonly serve as content wrappers and may change structurally
92
108
  * without affecting the actual content. Only these tags are stripped during
@@ -94,6 +110,8 @@ declare class HtmlDiff {
94
110
  */
95
111
  private static WrapperTags;
96
112
  private static isStructuralTag;
113
+ /** True when the word is a structural opening tag (e.g. `<p>`, `<div>`). */
114
+ private static isOpeningStructuralTag;
97
115
  /**
98
116
  * Returns true if words between structural tags are just whitespace (indentation).
99
117
  */
@@ -105,21 +123,25 @@ declare class HtmlDiff {
105
123
  private processInsertOperation;
106
124
  private processDeleteOperation;
107
125
  private processEqualOperation;
126
+ /** True when content projections are active for both sides — i.e. structural normalization is in effect. */
127
+ private usingContentProjections;
108
128
  /**
109
- * Gets original old words for a content-index range, including only content and formatting tags
110
- * (used for delete/replace operations where we don't want structural tags).
111
- */
112
- private getOriginalOldWords;
113
- /**
114
- * Gets original new words for a content-index range, including only content and formatting tags
115
- * (used for insert/replace operations where we don't want structural tags).
116
- */
117
- private getOriginalNewWords;
118
- /**
119
- * Gets original old words for a content-index range, INCLUDING structural tags and whitespace
120
- * between the content words (used for equal operations to preserve old HTML structure).
129
+ * Returns the slice of original (old or new) words covering a content-index range,
130
+ * including the structural tags that surround the content. Advances the side's cursor
131
+ * past the slice so the next op resumes correctly.
132
+ *
133
+ * The slice extends:
134
+ * - LEADING: from the side's cursor (or the first content word's original index,
135
+ * whichever is smaller) so structural tags that precede the first content word
136
+ * are picked up by this op rather than left orphaned.
137
+ * - TRAILING (non-last range): from just after the last content word, including
138
+ * closing structural tags that close *this* op's paragraphs, but stopping at
139
+ * the first opening structural tag that opening tag belongs to the next
140
+ * op's paragraph and would otherwise be emitted twice.
141
+ * - TRAILING (last range): all the way to the end of words, since there is no next
142
+ * op to claim the trailing tags.
121
143
  */
122
- private getOriginalOldWordsWithStructure;
144
+ private sliceOriginalWordsForOp;
123
145
  /**
124
146
  * This method encloses words within a specified tag (ins or del), and adds this into "content",
125
147
  * with a twist: if there are words contain tags, it actually creates multiple ins or del,