@createiq/htmldiff 1.0.2 → 1.0.4-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,843 @@
1
+ //#region src/Match.ts
2
+ var Match = class {
3
+ _startInOld;
4
+ _startInNew;
5
+ _size;
6
+ constructor(startInOld, startInNew, size) {
7
+ this._startInOld = startInOld;
8
+ this._startInNew = startInNew;
9
+ this._size = size;
10
+ }
11
+ get startInOld() {
12
+ return this._startInOld;
13
+ }
14
+ get startInNew() {
15
+ return this._startInNew;
16
+ }
17
+ get size() {
18
+ return this._size;
19
+ }
20
+ get endInOld() {
21
+ return this._startInOld + this._size;
22
+ }
23
+ get endInNew() {
24
+ return this._startInNew + this._size;
25
+ }
26
+ };
27
+ //#endregion
28
+ //#region src/Utils.ts
29
+ const openingTagRegex = /^\s*<[^>]+>\s*$/;
30
+ const closingTagTexRegex = /^\s*<\/[^>]+>\s*$/;
31
+ const tagWordRegex = /<[^\s>]+/;
32
+ const whitespaceRegex = /^(\s|&nbsp;)+$/;
33
+ const wordRegex = /[\w#@]+/;
34
+ const tagRegex = /<\/?(?<name>[^\s/>]+)[^>]*>/;
35
+ const SpecialCaseWordTags = ["<img"];
36
+ function isTag(item) {
37
+ if (SpecialCaseWordTags.some((re) => item?.startsWith(re))) return false;
38
+ return isOpeningTag(item) || isClosingTag(item);
39
+ }
40
+ function isOpeningTag(item) {
41
+ return openingTagRegex.test(item);
42
+ }
43
+ function isClosingTag(item) {
44
+ return closingTagTexRegex.test(item);
45
+ }
46
+ function stripTagAttributes(word) {
47
+ const match = tagWordRegex.exec(word);
48
+ if (match) return `${match[0]}${word.endsWith("/>") ? "/>" : ">"}`;
49
+ return word;
50
+ }
51
+ function wrapText(text, tagName, cssClass) {
52
+ return `<${tagName} class='${cssClass}'>${text}</${tagName}>`;
53
+ }
54
+ function isStartOfTag(val) {
55
+ return val === "<";
56
+ }
57
+ function isEndOfTag(val) {
58
+ return val === ">";
59
+ }
60
+ function isStartOfEntity(val) {
61
+ return val === "&";
62
+ }
63
+ function isEndOfEntity(val) {
64
+ return val === ";";
65
+ }
66
+ function isWhiteSpace(value) {
67
+ return whitespaceRegex.test(value);
68
+ }
69
+ function stripAnyAttributes(word) {
70
+ if (isTag(word)) return stripTagAttributes(word);
71
+ return word;
72
+ }
73
+ function isWord(text) {
74
+ return wordRegex.test(text);
75
+ }
76
+ function getTagName(word) {
77
+ if (word === null) return "";
78
+ const match = tagRegex.exec(word);
79
+ if (match) return match.groups?.name.toLowerCase() ?? match[1].toLowerCase();
80
+ return "";
81
+ }
82
+ var Utils_default = {
83
+ isTag,
84
+ stripTagAttributes,
85
+ wrapText,
86
+ isStartOfTag,
87
+ isEndOfTag,
88
+ isStartOfEntity,
89
+ isEndOfEntity,
90
+ isWhiteSpace,
91
+ stripAnyAttributes,
92
+ isWord,
93
+ getTagName
94
+ };
95
+ //#endregion
96
+ //#region src/MatchFinder.ts
97
+ /**
98
+ * Finds the longest match in given texts. It uses indexing with fixed granularity that is used to compare blocks of text.
99
+ */
100
+ var MatchFinder = class MatchFinder {
101
+ oldWords;
102
+ newWords;
103
+ startInOld;
104
+ endInOld;
105
+ startInNew;
106
+ endInNew;
107
+ wordIndices = {};
108
+ options;
109
+ constructor(oldWords, newWords, startInOld, endInOld, startInNew, endInNew, options) {
110
+ this.oldWords = oldWords;
111
+ this.newWords = newWords;
112
+ this.startInOld = startInOld;
113
+ this.endInOld = endInOld;
114
+ this.startInNew = startInNew;
115
+ this.endInNew = endInNew;
116
+ this.options = options;
117
+ }
118
+ indexNewWords() {
119
+ this.wordIndices = {};
120
+ const block = [];
121
+ for (let i = this.startInNew; i < this.endInNew; i++) {
122
+ const word = this.normalizeForIndex(this.newWords[i]);
123
+ const key = MatchFinder.putNewWord(block, word, this.options.blockSize);
124
+ if (key === null) continue;
125
+ if (!this.wordIndices[key]) this.wordIndices[key] = [];
126
+ this.wordIndices[key].push(i);
127
+ }
128
+ }
129
+ static putNewWord(block, word, blockSize) {
130
+ block.push(word);
131
+ if (block.length > blockSize) block.shift();
132
+ if (block.length !== blockSize) return null;
133
+ return block.join("");
134
+ }
135
+ normalizeForIndex(word) {
136
+ const output = Utils_default.stripAnyAttributes(word);
137
+ if (this.options.ignoreWhitespaceDifferences && Utils_default.isWhiteSpace(output)) return " ";
138
+ return output;
139
+ }
140
+ findMatch() {
141
+ this.indexNewWords();
142
+ this.removeRepeatingWords();
143
+ let hasIndices = false;
144
+ for (const _key in this.wordIndices) {
145
+ hasIndices = true;
146
+ break;
147
+ }
148
+ if (!hasIndices) return null;
149
+ let bestMatchInOld = this.startInOld;
150
+ let bestMatchInNew = this.startInNew;
151
+ let bestMatchSize = 0;
152
+ let matchLengthAt = /* @__PURE__ */ new Map();
153
+ const block = [];
154
+ for (let indexInOld = this.startInOld; indexInOld < this.endInOld; indexInOld++) {
155
+ const word = this.normalizeForIndex(this.oldWords[indexInOld]);
156
+ const index = MatchFinder.putNewWord(block, word, this.options.blockSize);
157
+ if (index === null) continue;
158
+ const newMatchLengthAt = /* @__PURE__ */ new Map();
159
+ if (!this.wordIndices[index]) {
160
+ matchLengthAt = newMatchLengthAt;
161
+ continue;
162
+ }
163
+ for (const indexInNew of this.wordIndices[index]) {
164
+ const newMatchLength = (matchLengthAt.has(indexInNew - 1) ? matchLengthAt.get(indexInNew - 1) : 0) + 1;
165
+ newMatchLengthAt.set(indexInNew, newMatchLength);
166
+ if (newMatchLength > bestMatchSize) {
167
+ bestMatchInOld = indexInOld - newMatchLength - this.options.blockSize + 2;
168
+ bestMatchInNew = indexInNew - newMatchLength - this.options.blockSize + 2;
169
+ bestMatchSize = newMatchLength;
170
+ }
171
+ }
172
+ matchLengthAt = newMatchLengthAt;
173
+ }
174
+ return bestMatchSize !== 0 ? new Match(bestMatchInOld, bestMatchInNew, bestMatchSize + this.options.blockSize - 1) : null;
175
+ }
176
+ /**
177
+ * This method removes words that occur too many times. This way it reduces total count of comparison operations
178
+ * and as result the diff algorithm takes less time. But the side effect is that it may detect false differences of
179
+ * the repeating words.
180
+ * @private
181
+ */
182
+ removeRepeatingWords() {
183
+ const threshold = this.newWords.length * this.options.repeatingWordsAccuracy;
184
+ const repeatingWords = Object.entries(this.wordIndices).filter(([, indices]) => indices.length > threshold).map(([word]) => word);
185
+ for (const w of repeatingWords) delete this.wordIndices[w];
186
+ }
187
+ };
188
+ //#endregion
189
+ //#region src/Operation.ts
190
+ var Operation = class {
191
+ action;
192
+ startInOld;
193
+ endInOld;
194
+ startInNew;
195
+ endInNew;
196
+ constructor(action, startInOld, endInOld, startInNew, endInNew) {
197
+ this.action = action;
198
+ this.startInOld = startInOld;
199
+ this.endInOld = endInOld;
200
+ this.startInNew = startInNew;
201
+ this.endInNew = endInNew;
202
+ }
203
+ };
204
+ //#endregion
205
+ //#region src/WordSplitter.ts
206
+ var WordSplitter = class WordSplitter {
207
+ text;
208
+ isBlockCheckRequired;
209
+ blockLocations;
210
+ mode;
211
+ isGrouping = false;
212
+ globbingUntil;
213
+ currentWord;
214
+ words;
215
+ static NotGlobbing = -1;
216
+ get currentWordHasChars() {
217
+ return this.currentWord.length > 0;
218
+ }
219
+ constructor(text, blockExpressions) {
220
+ this.text = text;
221
+ this.blockLocations = new BlockFinder(text, blockExpressions).findBlocks();
222
+ this.isBlockCheckRequired = this.blockLocations.hasBlocks;
223
+ this.mode = 0;
224
+ this.globbingUntil = WordSplitter.NotGlobbing;
225
+ this.currentWord = [];
226
+ this.words = [];
227
+ }
228
+ process() {
229
+ for (let index = 0; index < this.text.length; index++) {
230
+ const character = this.text.charAt(index);
231
+ this.processCharacter(index, character);
232
+ }
233
+ this.appendCurrentWordToWords();
234
+ return this.words;
235
+ }
236
+ processCharacter(index, character) {
237
+ if (this.isGlobbing(index, character)) return;
238
+ switch (this.mode) {
239
+ case 0:
240
+ this.processTextCharacter(character);
241
+ break;
242
+ case 1:
243
+ this.processHtmlTagContinuation(character);
244
+ break;
245
+ case 2:
246
+ this.processWhiteSpaceContinuation(character);
247
+ break;
248
+ case 3:
249
+ this.processEntityContinuation(character);
250
+ break;
251
+ }
252
+ }
253
+ processEntityContinuation(character) {
254
+ if (Utils_default.isStartOfTag(character)) {
255
+ this.appendCurrentWordToWords();
256
+ this.currentWord.push(character);
257
+ this.mode = 1;
258
+ } else if (character.trim().length === 0) {
259
+ this.appendCurrentWordToWords();
260
+ this.currentWord.push(character);
261
+ this.mode = 2;
262
+ } else if (Utils_default.isEndOfEntity(character)) {
263
+ let switchToNextMode = true;
264
+ if (this.currentWordHasChars) {
265
+ this.currentWord.push(character);
266
+ this.words.push(this.currentWord.join(""));
267
+ if (this.words.length > 2 && Utils_default.isWhiteSpace(this.words[this.words.length - 2]) && Utils_default.isWhiteSpace(this.words[this.words.length - 1])) {
268
+ const w1 = this.words[this.words.length - 2];
269
+ const w2 = this.words[this.words.length - 1];
270
+ this.words.splice(this.words.length - 2, 2);
271
+ this.currentWord = `${w1}${w2}`.split("");
272
+ this.mode = 2;
273
+ switchToNextMode = false;
274
+ }
275
+ }
276
+ if (switchToNextMode) {
277
+ this.currentWord = [];
278
+ this.mode = 0;
279
+ }
280
+ } else if (Utils_default.isWord(character)) this.currentWord.push(character);
281
+ else {
282
+ this.appendCurrentWordToWords();
283
+ this.currentWord.push(character);
284
+ this.mode = 0;
285
+ }
286
+ }
287
+ processWhiteSpaceContinuation(character) {
288
+ if (Utils_default.isStartOfTag(character)) {
289
+ this.appendCurrentWordToWords();
290
+ this.currentWord.push(character);
291
+ this.mode = 1;
292
+ } else if (Utils_default.isStartOfEntity(character)) {
293
+ this.appendCurrentWordToWords();
294
+ this.currentWord.push(character);
295
+ this.mode = 3;
296
+ } else if (Utils_default.isWhiteSpace(character)) this.currentWord.push(character);
297
+ else {
298
+ this.appendCurrentWordToWords();
299
+ this.currentWord.push(character);
300
+ this.mode = 0;
301
+ }
302
+ }
303
+ processHtmlTagContinuation(character) {
304
+ if (Utils_default.isEndOfTag(character)) {
305
+ this.currentWord.push(character);
306
+ this.appendCurrentWordToWords();
307
+ this.mode = Utils_default.isWhiteSpace(character) ? 2 : 0;
308
+ } else this.currentWord.push(character);
309
+ }
310
+ processTextCharacter(character) {
311
+ if (Utils_default.isStartOfTag(character)) {
312
+ this.appendCurrentWordToWords();
313
+ this.currentWord.push("<");
314
+ this.mode = 1;
315
+ } else if (Utils_default.isStartOfEntity(character)) {
316
+ this.appendCurrentWordToWords();
317
+ this.currentWord.push(character);
318
+ this.mode = 3;
319
+ } else if (Utils_default.isWhiteSpace(character)) {
320
+ this.appendCurrentWordToWords();
321
+ this.currentWord.push(character);
322
+ this.mode = 2;
323
+ } else if (Utils_default.isWord(character) && (this.currentWord.length === 0 || Utils_default.isWord(this.currentWord[this.currentWord.length - 1]))) this.currentWord.push(character);
324
+ else {
325
+ this.appendCurrentWordToWords();
326
+ this.currentWord.push(character);
327
+ }
328
+ }
329
+ appendCurrentWordToWords() {
330
+ if (this.currentWordHasChars) {
331
+ this.words.push(this.currentWord.join(""));
332
+ this.currentWord = [];
333
+ }
334
+ }
335
+ isGlobbing(index, character) {
336
+ if (!this.isBlockCheckRequired) return false;
337
+ if (index === this.globbingUntil) {
338
+ this.globbingUntil = WordSplitter.NotGlobbing;
339
+ this.isGrouping = false;
340
+ this.appendCurrentWordToWords();
341
+ }
342
+ const until = this.blockLocations.isInBlock(index);
343
+ if (until) {
344
+ this.isGrouping = true;
345
+ this.globbingUntil = until;
346
+ }
347
+ if (this.isGrouping) {
348
+ this.currentWord.push(character);
349
+ this.mode = 0;
350
+ }
351
+ return this.isGrouping;
352
+ }
353
+ static convertHtmlToListOfWords(text, blockExpressions) {
354
+ return new WordSplitter(text, blockExpressions).process();
355
+ }
356
+ };
357
+ var BlockFinderResult = class {
358
+ blocks = /* @__PURE__ */ new Map();
359
+ addBlock(from, to) {
360
+ if (this.blocks.has(from)) throw new ArgumentError("One or more block expressions result in a text sequence that overlaps.");
361
+ this.blocks.set(from, to);
362
+ }
363
+ isInBlock(location) {
364
+ return this.blocks.get(location) ?? null;
365
+ }
366
+ get hasBlocks() {
367
+ return this.blocks.size > 0;
368
+ }
369
+ };
370
+ var ArgumentError = class extends Error {};
371
+ var BlockFinder = class {
372
+ text;
373
+ blockExpressions;
374
+ constructor(text, blockExpressions) {
375
+ this.text = text;
376
+ this.blockExpressions = blockExpressions;
377
+ }
378
+ findBlocks() {
379
+ const result = new BlockFinderResult();
380
+ for (const expression of this.blockExpressions) this.processBlockMatcher(expression, result);
381
+ return result;
382
+ }
383
+ processBlockMatcher(exp, result) {
384
+ let match;
385
+ while ((match = exp.exec(this.text)) !== null) this.tryAddBlock(exp, match, result);
386
+ }
387
+ tryAddBlock(exp, match, result) {
388
+ try {
389
+ const from = match.index;
390
+ const to = match.index + match[0].length;
391
+ result.addBlock(from, to);
392
+ } catch {
393
+ throw new ArgumentError(`One or more block expressions result in a text sequence that overlaps. Current expression: ${exp}`);
394
+ }
395
+ }
396
+ };
397
+ //#endregion
398
+ //#region src/HtmlDiff.ts
399
+ var HtmlDiff = class HtmlDiff {
400
+ /**
401
+ * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
402
+ * @private
403
+ */
404
+ static MatchGranularityMaximum = 4;
405
+ static DelTag = "del";
406
+ static InsTag = "ins";
407
+ static SpecialCaseClosingTags = [
408
+ "</strong>",
409
+ "</em>",
410
+ "</b>",
411
+ "</i>",
412
+ "</big>",
413
+ "</small>",
414
+ "</u>",
415
+ "</sub>",
416
+ "</sup>",
417
+ "</strike>",
418
+ "</s>",
419
+ "</span>"
420
+ ];
421
+ static SpecialCaseClosingTagsSet = new Set([
422
+ "</strong>",
423
+ "</em>",
424
+ "</b>",
425
+ "</i>",
426
+ "</big>",
427
+ "</small>",
428
+ "</u>",
429
+ "</sub>",
430
+ "</sup>",
431
+ "</strike>",
432
+ "</s>",
433
+ "</span>"
434
+ ]);
435
+ static SpecialCaseOpeningTagRegex = /<((strong)|(b)|(i)|(em)|(big)|(small)|(u)|(sub)|(sup)|(strike)|(s)|(span))[>\s]+/i;
436
+ static FormattingTags = new Set([
437
+ "strong",
438
+ "em",
439
+ "b",
440
+ "i",
441
+ "big",
442
+ "small",
443
+ "u",
444
+ "sub",
445
+ "sup",
446
+ "strike",
447
+ "s",
448
+ "span"
449
+ ]);
450
+ content = [];
451
+ newText;
452
+ oldText;
453
+ specialTagDiffStack = [];
454
+ newWords = [];
455
+ oldWords = [];
456
+ /**
457
+ * Content-only projections of oldWords/newWords (structural tags and adjacent whitespace removed).
458
+ * When null, no structural normalization is applied (the word arrays are identical for diffing).
459
+ */
460
+ oldContentWords = null;
461
+ newContentWords = null;
462
+ /** Maps content-word index → original word index */
463
+ oldContentToOriginal = null;
464
+ newContentToOriginal = null;
465
+ matchGranularity = 0;
466
+ blockExpressions = [];
467
+ /**
468
+ * Defines how to compare repeating words. Valid values are from 0 to 1.
469
+ * This value allows to exclude some words from comparison that eventually
470
+ * reduces the total time of the diff algorithm.
471
+ * 0 means that all words are excluded so the diff will not find any matching words at all.
472
+ * 1 (default value) means that all words participate in comparison so this is the most accurate case.
473
+ * 0.5 means that any word that occurs more than 50% times may be excluded from comparison. This doesn't
474
+ * mean that such words will definitely be excluded but only gives a permission to exclude them if necessary.
475
+ */
476
+ repeatingWordsAccuracy = 1;
477
+ /**
478
+ * If true all whitespaces are considered as equal
479
+ */
480
+ ignoreWhitespaceDifferences = false;
481
+ /**
482
+ * If some match is too small and located far from its neighbors then it is considered as orphan
483
+ * and removed. For example:
484
+ * <code>
485
+ * aaaaa bb ccccccccc dddddd ee
486
+ * 11111 bb 222222222 dddddd ee
487
+ * </code>
488
+ * will find two matches <code>bb</code> and <code>dddddd ee</code> but the first will be considered
489
+ * as orphan and ignored, as result it will consider texts <code>aaaaa bb ccccccccc</code> and
490
+ * <code>11111 bb 222222222</code> as single replacement:
491
+ * <code>
492
+ * &lt;del&gt;aaaaa bb ccccccccc&lt;/del&gt;&lt;ins&gt;11111 bb 222222222&lt;/ins&gt; dddddd ee
493
+ * </code>
494
+ * This property defines relative size of the match to be considered as orphan, from 0 to 1.
495
+ * 1 means that all matches will be considered as orphans.
496
+ * 0 (default) means that no match will be considered as orphan.
497
+ * 0.2 means that if match length is less than 20% of distance between its neighbors it is considered as orphan.
498
+ */
499
+ orphanMatchThreshold = 0;
500
+ /**
501
+ * Initializes a new instance of the class.
502
+ * @param oldText The old text.
503
+ * @param newText The new text.
504
+ */
505
+ constructor(oldText, newText) {
506
+ this.oldText = oldText;
507
+ this.newText = newText;
508
+ }
509
+ static execute(oldText, newText) {
510
+ return new HtmlDiff(oldText, newText).build();
511
+ }
512
+ /**
513
+ * Builds the HTML diff output
514
+ * @return HTML diff markup
515
+ */
516
+ build() {
517
+ if (this.oldText === this.newText) return this.newText;
518
+ this.splitInputsToWords();
519
+ this.buildContentProjections();
520
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
521
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
522
+ this.matchGranularity = Math.min(HtmlDiff.MatchGranularityMaximum, Math.min(wordsForDiffOld.length, wordsForDiffNew.length));
523
+ const operations = this.operations();
524
+ for (const op of operations) this.performOperation(op);
525
+ return this.content.join("");
526
+ }
527
+ /**
528
+ * Uses {@link expression} to group text together so that any change detected within the group is treated as a single block
529
+ * @param expression
530
+ */
531
+ addBlockExpression(expression) {
532
+ this.blockExpressions.push(expression);
533
+ }
534
+ splitInputsToWords() {
535
+ this.oldWords = WordSplitter.convertHtmlToListOfWords(this.oldText, this.blockExpressions);
536
+ this.oldText = "";
537
+ this.newWords = WordSplitter.convertHtmlToListOfWords(this.newText, this.blockExpressions);
538
+ this.newText = "";
539
+ }
540
+ /**
541
+ * Checks whether the two word arrays have structural HTML differences (different non-formatting tags
542
+ * or different whitespace between structural tags). When they do, builds "content projections" that
543
+ * strip structural noise so the diff algorithm only sees meaningful content and formatting changes.
544
+ */
545
+ buildContentProjections() {
546
+ const oldProjection = HtmlDiff.createContentProjection(this.oldWords);
547
+ const newProjection = HtmlDiff.createContentProjection(this.newWords);
548
+ if (!HtmlDiff.hasStructuralDifferences(this.oldWords, this.newWords)) return;
549
+ this.oldContentWords = oldProjection.contentWords;
550
+ this.oldContentToOriginal = oldProjection.contentToOriginal;
551
+ this.newContentWords = newProjection.contentWords;
552
+ this.newContentToOriginal = newProjection.contentToOriginal;
553
+ }
554
+ /**
555
+ * Tags that commonly serve as content wrappers and may change structurally
556
+ * without affecting the actual content. Only these tags are stripped during
557
+ * structural normalization.
558
+ */
559
+ static WrapperTags = new Set([
560
+ "div",
561
+ "p",
562
+ "section",
563
+ "article",
564
+ "main",
565
+ "header",
566
+ "footer",
567
+ "aside",
568
+ "nav"
569
+ ]);
570
+ static isStructuralTag(word) {
571
+ if (!Utils_default.isTag(word)) return false;
572
+ const tagName = Utils_default.getTagName(word);
573
+ return HtmlDiff.WrapperTags.has(tagName);
574
+ }
575
+ /**
576
+ * Returns true if words between structural tags are just whitespace (indentation).
577
+ */
578
+ static isStructuralWhitespace(words, index) {
579
+ if (!Utils_default.isWhiteSpace(words[index])) return false;
580
+ const prevIsStructural = index === 0 || HtmlDiff.isStructuralTag(words[index - 1]);
581
+ const nextIsStructural = index === words.length - 1 || HtmlDiff.isStructuralTag(words[index + 1]);
582
+ return prevIsStructural || nextIsStructural;
583
+ }
584
+ static createContentProjection(words) {
585
+ const contentWords = [];
586
+ const contentToOriginal = [];
587
+ for (let i = 0; i < words.length; i++) {
588
+ if (HtmlDiff.isStructuralTag(words[i])) continue;
589
+ if (HtmlDiff.isStructuralWhitespace(words, i)) continue;
590
+ contentWords.push(words[i]);
591
+ contentToOriginal.push(i);
592
+ }
593
+ return {
594
+ contentWords,
595
+ contentToOriginal
596
+ };
597
+ }
598
+ static hasStructuralDifferences(oldWords, newWords) {
599
+ const oldStructural = [];
600
+ const newStructural = [];
601
+ for (const w of oldWords) if (HtmlDiff.isStructuralTag(w)) oldStructural.push(Utils_default.stripTagAttributes(w));
602
+ for (const w of newWords) if (HtmlDiff.isStructuralTag(w)) newStructural.push(Utils_default.stripTagAttributes(w));
603
+ if (oldStructural.length !== newStructural.length) return true;
604
+ for (let i = 0; i < oldStructural.length; i++) if (oldStructural[i] !== newStructural[i]) return true;
605
+ return false;
606
+ }
607
+ performOperation(operation) {
608
+ switch (operation.action) {
609
+ case 0:
610
+ this.processEqualOperation(operation);
611
+ break;
612
+ case 1:
613
+ this.processDeleteOperation(operation, "diffdel");
614
+ break;
615
+ case 2:
616
+ this.processInsertOperation(operation, "diffins");
617
+ break;
618
+ case 3: break;
619
+ case 4:
620
+ this.processReplaceOperation(operation);
621
+ break;
622
+ }
623
+ }
624
+ processReplaceOperation(operation) {
625
+ this.processDeleteOperation(operation, "diffmod");
626
+ this.processInsertOperation(operation, "diffmod");
627
+ }
628
+ processInsertOperation(operation, cssClass) {
629
+ const words = this.oldContentWords ? this.getOriginalNewWords(operation.startInNew, operation.endInNew) : this.newWords.slice(operation.startInNew, operation.endInNew);
630
+ this.insertTag(HtmlDiff.InsTag, cssClass, words);
631
+ }
632
+ processDeleteOperation(operation, cssClass) {
633
+ const words = this.oldContentWords ? this.getOriginalOldWords(operation.startInOld, operation.endInOld) : this.oldWords.slice(operation.startInOld, operation.endInOld);
634
+ this.insertTag(HtmlDiff.DelTag, cssClass, words);
635
+ }
636
+ processEqualOperation(operation) {
637
+ if (this.oldContentWords) {
638
+ const result = this.getOriginalOldWordsWithStructure(operation.startInOld, operation.endInOld);
639
+ this.content.push(result.join(""));
640
+ } else {
641
+ const result = this.newWords.slice(operation.startInNew, operation.endInNew);
642
+ this.content.push(result.join(""));
643
+ }
644
+ }
645
+ /**
646
+ * Gets original old words for a content-index range, including only content and formatting tags
647
+ * (used for delete/replace operations where we don't want structural tags).
648
+ */
649
+ getOriginalOldWords(contentStart, contentEnd) {
650
+ if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd);
651
+ const result = [];
652
+ for (let i = contentStart; i < contentEnd; i++) result.push(this.oldWords[this.oldContentToOriginal[i]]);
653
+ return result;
654
+ }
655
+ /**
656
+ * Gets original new words for a content-index range, including only content and formatting tags
657
+ * (used for insert/replace operations where we don't want structural tags).
658
+ */
659
+ getOriginalNewWords(contentStart, contentEnd) {
660
+ if (!this.newContentToOriginal) return this.newWords.slice(contentStart, contentEnd);
661
+ const result = [];
662
+ for (let i = contentStart; i < contentEnd; i++) result.push(this.newWords[this.newContentToOriginal[i]]);
663
+ return result;
664
+ }
665
+ /**
666
+ * Gets original old words for a content-index range, INCLUDING structural tags and whitespace
667
+ * between the content words (used for equal operations to preserve old HTML structure).
668
+ */
669
+ getOriginalOldWordsWithStructure(contentStart, contentEnd) {
670
+ if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd);
671
+ if (contentStart >= contentEnd) return [];
672
+ const origStart = this.oldContentToOriginal[contentStart];
673
+ const origEnd = contentEnd < this.oldContentToOriginal.length ? this.oldContentToOriginal[contentEnd] : this.oldWords.length;
674
+ return this.oldWords.slice(origStart, origEnd);
675
+ }
676
+ /**
677
+ * This method encloses words within a specified tag (ins or del), and adds this into "content",
678
+ * with a twist: if there are words contain tags, it actually creates multiple ins or del,
679
+ * so that they don't include any ins or del. This handles cases like
680
+ * old: '<p>a</p>'
681
+ * new: '<p>ab</p>
682
+ * <p>
683
+ * c</b>'
684
+ * diff result: '<p>a<ins>b</ins></p>
685
+ * <p>
686
+ * <ins>c</ins>
687
+ * </p>
688
+ * '
689
+ * this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
690
+ * del tags), but handles correctly more cases than the earlier version.
691
+ * P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
692
+ * @param tag
693
+ * @param cssClass
694
+ * @param words
695
+ * @private
696
+ */
697
+ insertTag(tag, cssClass, words) {
698
+ while (true) {
699
+ if (words.length === 0) break;
700
+ const allWordsUntilFirstTag = this.extractConsecutiveWords(words, (x) => !Utils_default.isTag(x));
701
+ if (allWordsUntilFirstTag.length > 0) {
702
+ const text = Utils_default.wrapText(allWordsUntilFirstTag.join(""), tag, cssClass);
703
+ this.content.push(text);
704
+ }
705
+ if (words.length === 0) break;
706
+ const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
707
+ const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
708
+ let specialCaseTagInjection = "";
709
+ let specialCaseTagInjectionIsBefore = false;
710
+ if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
711
+ const tagNames = /* @__PURE__ */ new Set();
712
+ for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
713
+ const styledTagNames = Array.from(tagNames).join(" ");
714
+ this.specialTagDiffStack.push(words[0]);
715
+ specialCaseTagInjection = `<ins class='mod ${styledTagNames}'>`;
716
+ if (tag === HtmlDiff.DelTag) {
717
+ words.shift();
718
+ while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
719
+ }
720
+ } else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
721
+ const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
722
+ const openingAndClosingTagsMatch = !!openingTag && Utils_default.getTagName(openingTag) === Utils_default.getTagName(words[indexLastTagInFirstTagBlock]);
723
+ if (!!openingTag && openingAndClosingTagsMatch) {
724
+ specialCaseTagInjection = "</ins>";
725
+ specialCaseTagInjectionIsBefore = true;
726
+ } else if (openingTag) this.specialTagDiffStack.push(openingTag);
727
+ if (tag === HtmlDiff.DelTag) {
728
+ words.shift();
729
+ while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
730
+ }
731
+ }
732
+ if (words.length === 0 && specialCaseTagInjection.length === 0) break;
733
+ if (specialCaseTagInjectionIsBefore) this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, Utils_default.isTag).join(""));
734
+ else this.content.push(this.extractConsecutiveWords(words, Utils_default.isTag).join("") + specialCaseTagInjection);
735
+ if (words.length === 0) continue;
736
+ this.insertTag(tag, cssClass, words);
737
+ break;
738
+ }
739
+ }
740
+ extractConsecutiveWords(words, condition) {
741
+ let indexOfFirstTag = null;
742
+ for (let i = 0; i < words.length; i++) {
743
+ const word = words[i];
744
+ if (i === 0 && word === " ") words[i] = "&nbsp;";
745
+ if (!condition(word)) {
746
+ indexOfFirstTag = i;
747
+ break;
748
+ }
749
+ }
750
+ if (indexOfFirstTag !== null) {
751
+ const items = words.slice(0, indexOfFirstTag);
752
+ if (indexOfFirstTag > 0) words.splice(0, indexOfFirstTag);
753
+ return items;
754
+ }
755
+ const items = words.slice(0);
756
+ words.splice(0, words.length);
757
+ return items;
758
+ }
759
+ operations() {
760
+ let positionInOld = 0;
761
+ let positionInNew = 0;
762
+ const operations = [];
763
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
764
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
765
+ const matches = this.matchingBlocks();
766
+ matches.push(new Match(wordsForDiffOld.length, wordsForDiffNew.length, 0));
767
+ const matchesWithoutOrphans = this.removeOrphans(matches);
768
+ for (const match of matchesWithoutOrphans) {
769
+ const matchStartsAtCurrentPositionInOld = positionInOld === match.startInOld;
770
+ const matchStartsAtCurrentPositionInNew = positionInNew === match.startInNew;
771
+ let action;
772
+ if (!matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) action = 4;
773
+ else if (matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) action = 2;
774
+ else if (!matchStartsAtCurrentPositionInOld) action = 1;
775
+ else action = 3;
776
+ if (action !== 3) operations.push(new Operation(action, positionInOld, match.startInOld, positionInNew, match.startInNew));
777
+ if (match.size !== 0) operations.push(new Operation(0, match.startInOld, match.endInOld, match.startInNew, match.endInNew));
778
+ positionInOld = match.endInOld;
779
+ positionInNew = match.endInNew;
780
+ }
781
+ return operations;
782
+ }
783
+ *removeOrphans(matches) {
784
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
785
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
786
+ let prev = new Match(0, 0, 0);
787
+ let curr = null;
788
+ for (const next of matches) {
789
+ if (curr === null) {
790
+ curr = next;
791
+ continue;
792
+ }
793
+ if (prev.endInOld === curr.startInOld && prev.endInNew === curr.startInNew || curr.endInOld === next.startInOld && curr.endInNew === next.startInNew) {
794
+ yield curr;
795
+ prev = curr;
796
+ curr = next;
797
+ continue;
798
+ }
799
+ let oldDistanceInChars = 0;
800
+ for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
801
+ let newDistanceInChars = 0;
802
+ for (let i = prev.endInNew; i < next.startInNew; i++) newDistanceInChars += wordsForDiffNew[i].length;
803
+ let currMatchLengthInChars = 0;
804
+ for (let i = curr.startInNew; i < curr.endInNew; i++) currMatchLengthInChars += wordsForDiffNew[i].length;
805
+ if (currMatchLengthInChars > Math.max(oldDistanceInChars, newDistanceInChars) * this.orphanMatchThreshold) yield curr;
806
+ prev = curr;
807
+ curr = next;
808
+ }
809
+ if (curr !== null) yield curr;
810
+ }
811
+ matchingBlocks() {
812
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
813
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
814
+ const matchingBlocks = [];
815
+ this.findMatchingBlocks(0, wordsForDiffOld.length, 0, wordsForDiffNew.length, matchingBlocks);
816
+ return matchingBlocks;
817
+ }
818
+ findMatchingBlocks(startInOld, endInOld, startInNew, endInNew, matchingBlocks) {
819
+ const match = this.findMatch(startInOld, endInOld, startInNew, endInNew);
820
+ if (match !== null) {
821
+ if (startInOld < match.startInOld && startInNew < match.startInNew) this.findMatchingBlocks(startInOld, match.startInOld, startInNew, match.startInNew, matchingBlocks);
822
+ matchingBlocks.push(match);
823
+ if (match.endInOld < endInOld && match.endInNew < endInNew) this.findMatchingBlocks(match.endInOld, endInOld, match.endInNew, endInNew, matchingBlocks);
824
+ }
825
+ }
826
+ findMatch(startInOld, endInOld, startInNew, endInNew) {
827
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
828
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
829
+ for (let i = this.matchGranularity; i > 0; i--) {
830
+ const match = new MatchFinder(wordsForDiffOld, wordsForDiffNew, startInOld, endInOld, startInNew, endInNew, {
831
+ blockSize: i,
832
+ repeatingWordsAccuracy: this.repeatingWordsAccuracy,
833
+ ignoreWhitespaceDifferences: this.ignoreWhitespaceDifferences
834
+ }).findMatch();
835
+ if (match !== null) return match;
836
+ }
837
+ return null;
838
+ }
839
+ };
840
+ //#endregion
841
+ export { HtmlDiff as default };
842
+
843
+ //# sourceMappingURL=HtmlDiff.mjs.map