@createiq/htmldiff 1.0.3 → 1.0.4-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HtmlDiff.cjs CHANGED
@@ -1,850 +1,852 @@
1
- "use strict";
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
- var __export = (target, all) => {
7
- for (var name in all)
8
- __defProp(target, name, { get: all[name], enumerable: true });
9
- };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
17
- };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
-
20
- // src/HtmlDiff.ts
21
- var HtmlDiff_exports = {};
22
- __export(HtmlDiff_exports, {
23
- default: () => HtmlDiff
24
- });
25
- module.exports = __toCommonJS(HtmlDiff_exports);
26
-
27
- // src/Action.ts
28
- var Action = /* @__PURE__ */ ((Action2) => {
29
- Action2[Action2["Equal"] = 0] = "Equal";
30
- Action2[Action2["Delete"] = 1] = "Delete";
31
- Action2[Action2["Insert"] = 2] = "Insert";
32
- Action2[Action2["None"] = 3] = "None";
33
- Action2[Action2["Replace"] = 4] = "Replace";
34
- return Action2;
35
- })(Action || {});
36
- var Action_default = Action;
37
-
38
- // src/Match.ts
1
+ //#region src/Match.ts
39
2
  var Match = class {
40
- _startInOld;
41
- _startInNew;
42
- _size;
43
- constructor(startInOld, startInNew, size) {
44
- this._startInOld = startInOld;
45
- this._startInNew = startInNew;
46
- this._size = size;
47
- }
48
- get startInOld() {
49
- return this._startInOld;
50
- }
51
- get startInNew() {
52
- return this._startInNew;
53
- }
54
- get size() {
55
- return this._size;
56
- }
57
- get endInOld() {
58
- return this._startInOld + this._size;
59
- }
60
- get endInNew() {
61
- return this._startInNew + this._size;
62
- }
3
+ _startInOld;
4
+ _startInNew;
5
+ _size;
6
+ constructor(startInOld, startInNew, size) {
7
+ this._startInOld = startInOld;
8
+ this._startInNew = startInNew;
9
+ this._size = size;
10
+ }
11
+ get startInOld() {
12
+ return this._startInOld;
13
+ }
14
+ get startInNew() {
15
+ return this._startInNew;
16
+ }
17
+ get size() {
18
+ return this._size;
19
+ }
20
+ get endInOld() {
21
+ return this._startInOld + this._size;
22
+ }
23
+ get endInNew() {
24
+ return this._startInNew + this._size;
25
+ }
63
26
  };
64
-
65
- // src/Utils.ts
66
- var openingTagRegex = /^\s*<[^>]+>\s*$/;
67
- var closingTagTexRegex = /^\s*<\/[^>]+>\s*$/;
68
- var tagWordRegex = /<[^\s>]+/;
69
- var whitespaceRegex = /^(\s|&nbsp;)+$/;
70
- var wordRegex = /[\w#@]+/;
71
- var tagRegex = /<\/?(?<name>[^\s/>]+)[^>]*>/;
72
- var SpecialCaseWordTags = ["<img"];
27
+ //#endregion
28
+ //#region src/Utils.ts
29
+ const openingTagRegex = /^\s*<[^>]+>\s*$/;
30
+ const closingTagTexRegex = /^\s*<\/[^>]+>\s*$/;
31
+ const tagWordRegex = /<[^\s>]+/;
32
+ const whitespaceRegex = /^(\s|&nbsp;)+$/;
33
+ const wordRegex = /[\w#@]+/;
34
+ const tagRegex = /<\/?(?<name>[^\s/>]+)[^>]*>/;
35
+ const SpecialCaseWordTags = ["<img"];
73
36
  function isTag(item) {
74
- if (SpecialCaseWordTags.some((re) => item?.startsWith(re))) {
75
- return false;
76
- }
77
- return isOpeningTag(item) || isClosingTag(item);
37
+ if (SpecialCaseWordTags.some((re) => item?.startsWith(re))) return false;
38
+ return isOpeningTag(item) || isClosingTag(item);
78
39
  }
79
40
  function isOpeningTag(item) {
80
- return openingTagRegex.test(item);
41
+ return openingTagRegex.test(item);
81
42
  }
82
43
  function isClosingTag(item) {
83
- return closingTagTexRegex.test(item);
44
+ return closingTagTexRegex.test(item);
84
45
  }
85
46
  function stripTagAttributes(word) {
86
- const match = tagWordRegex.exec(word);
87
- if (match) {
88
- return `${match[0]}${word.endsWith("/>") ? "/>" : ">"}`;
89
- }
90
- return word;
47
+ const match = tagWordRegex.exec(word);
48
+ if (match) return `${match[0]}${word.endsWith("/>") ? "/>" : ">"}`;
49
+ return word;
91
50
  }
92
51
  function wrapText(text, tagName, cssClass) {
93
- return `<${tagName} class='${cssClass}'>${text}</${tagName}>`;
52
+ return `<${tagName} class='${cssClass}'>${text}</${tagName}>`;
94
53
  }
95
54
  function isStartOfTag(val) {
96
- return val === "<";
55
+ return val === "<";
97
56
  }
98
57
  function isEndOfTag(val) {
99
- return val === ">";
58
+ return val === ">";
100
59
  }
101
60
  function isStartOfEntity(val) {
102
- return val === "&";
61
+ return val === "&";
103
62
  }
104
63
  function isEndOfEntity(val) {
105
- return val === ";";
64
+ return val === ";";
106
65
  }
107
66
  function isWhiteSpace(value) {
108
- return whitespaceRegex.test(value);
67
+ return whitespaceRegex.test(value);
109
68
  }
110
69
  function stripAnyAttributes(word) {
111
- if (isTag(word)) {
112
- return stripTagAttributes(word);
113
- }
114
- return word;
70
+ if (isTag(word)) return stripTagAttributes(word);
71
+ return word;
115
72
  }
116
73
  function isWord(text) {
117
- return wordRegex.test(text);
74
+ return wordRegex.test(text);
118
75
  }
119
76
  function getTagName(word) {
120
- if (word === null) {
121
- return "";
122
- }
123
- const match = tagRegex.exec(word);
124
- if (match) {
125
- return match.groups?.name.toLowerCase() ?? match[1].toLowerCase();
126
- }
127
- return "";
77
+ if (word === null) return "";
78
+ const match = tagRegex.exec(word);
79
+ if (match) return match.groups?.name.toLowerCase() ?? match[1].toLowerCase();
80
+ return "";
128
81
  }
129
82
  var Utils_default = {
130
- isTag,
131
- stripTagAttributes,
132
- wrapText,
133
- isStartOfTag,
134
- isEndOfTag,
135
- isStartOfEntity,
136
- isEndOfEntity,
137
- isWhiteSpace,
138
- stripAnyAttributes,
139
- isWord,
140
- getTagName
83
+ isTag,
84
+ stripTagAttributes,
85
+ wrapText,
86
+ isStartOfTag,
87
+ isEndOfTag,
88
+ isStartOfEntity,
89
+ isEndOfEntity,
90
+ isWhiteSpace,
91
+ stripAnyAttributes,
92
+ isWord,
93
+ getTagName
141
94
  };
142
-
143
- // src/MatchFinder.ts
144
- var MatchFinder = class _MatchFinder {
145
- oldWords;
146
- newWords;
147
- startInOld;
148
- endInOld;
149
- startInNew;
150
- endInNew;
151
- wordIndices = {};
152
- options;
153
- constructor(oldWords, newWords, startInOld, endInOld, startInNew, endInNew, options) {
154
- this.oldWords = oldWords;
155
- this.newWords = newWords;
156
- this.startInOld = startInOld;
157
- this.endInOld = endInOld;
158
- this.startInNew = startInNew;
159
- this.endInNew = endInNew;
160
- this.options = options;
161
- }
162
- indexNewWords() {
163
- this.wordIndices = {};
164
- const block = [];
165
- for (let i = this.startInNew; i < this.endInNew; i++) {
166
- const word = this.normalizeForIndex(this.newWords[i]);
167
- const key = _MatchFinder.putNewWord(block, word, this.options.blockSize);
168
- if (key === null) {
169
- continue;
170
- }
171
- if (!this.wordIndices[key]) {
172
- this.wordIndices[key] = [];
173
- }
174
- this.wordIndices[key].push(i);
175
- }
176
- }
177
- static putNewWord(block, word, blockSize) {
178
- block.push(word);
179
- if (block.length > blockSize) {
180
- block.shift();
181
- }
182
- if (block.length !== blockSize) {
183
- return null;
184
- }
185
- return block.join("");
186
- }
187
- normalizeForIndex(word) {
188
- const output = Utils_default.stripAnyAttributes(word);
189
- if (this.options.ignoreWhitespaceDifferences && Utils_default.isWhiteSpace(output)) {
190
- return " ";
191
- }
192
- return output;
193
- }
194
- findMatch() {
195
- this.indexNewWords();
196
- this.removeRepeatingWords();
197
- let hasIndices = false;
198
- for (const _key in this.wordIndices) {
199
- hasIndices = true;
200
- break;
201
- }
202
- if (!hasIndices) {
203
- return null;
204
- }
205
- let bestMatchInOld = this.startInOld;
206
- let bestMatchInNew = this.startInNew;
207
- let bestMatchSize = 0;
208
- let matchLengthAt = /* @__PURE__ */ new Map();
209
- const block = [];
210
- for (let indexInOld = this.startInOld; indexInOld < this.endInOld; indexInOld++) {
211
- const word = this.normalizeForIndex(this.oldWords[indexInOld]);
212
- const index = _MatchFinder.putNewWord(block, word, this.options.blockSize);
213
- if (index === null) {
214
- continue;
215
- }
216
- const newMatchLengthAt = /* @__PURE__ */ new Map();
217
- if (!this.wordIndices[index]) {
218
- matchLengthAt = newMatchLengthAt;
219
- continue;
220
- }
221
- for (const indexInNew of this.wordIndices[index]) {
222
- const newMatchLength = (matchLengthAt.has(indexInNew - 1) ? matchLengthAt.get(indexInNew - 1) : 0) + 1;
223
- newMatchLengthAt.set(indexInNew, newMatchLength);
224
- if (newMatchLength > bestMatchSize) {
225
- bestMatchInOld = indexInOld - newMatchLength - this.options.blockSize + 2;
226
- bestMatchInNew = indexInNew - newMatchLength - this.options.blockSize + 2;
227
- bestMatchSize = newMatchLength;
228
- }
229
- }
230
- matchLengthAt = newMatchLengthAt;
231
- }
232
- return bestMatchSize !== 0 ? new Match(bestMatchInOld, bestMatchInNew, bestMatchSize + this.options.blockSize - 1) : null;
233
- }
234
- /**
235
- * This method removes words that occur too many times. This way it reduces total count of comparison operations
236
- * and as result the diff algorithm takes less time. But the side effect is that it may detect false differences of
237
- * the repeating words.
238
- * @private
239
- */
240
- removeRepeatingWords() {
241
- const threshold = this.newWords.length * this.options.repeatingWordsAccuracy;
242
- const repeatingWords = Object.entries(this.wordIndices).filter(([, indices]) => indices.length > threshold).map(([word]) => word);
243
- for (const w of repeatingWords) {
244
- delete this.wordIndices[w];
245
- }
246
- }
95
+ //#endregion
96
+ //#region src/MatchFinder.ts
97
+ /**
98
+ * Finds the longest match in given texts. It uses indexing with fixed granularity that is used to compare blocks of text.
99
+ */
100
+ var MatchFinder = class MatchFinder {
101
+ oldWords;
102
+ newWords;
103
+ startInOld;
104
+ endInOld;
105
+ startInNew;
106
+ endInNew;
107
+ wordIndices = {};
108
+ options;
109
+ constructor(oldWords, newWords, startInOld, endInOld, startInNew, endInNew, options) {
110
+ this.oldWords = oldWords;
111
+ this.newWords = newWords;
112
+ this.startInOld = startInOld;
113
+ this.endInOld = endInOld;
114
+ this.startInNew = startInNew;
115
+ this.endInNew = endInNew;
116
+ this.options = options;
117
+ }
118
+ indexNewWords() {
119
+ this.wordIndices = {};
120
+ const block = [];
121
+ for (let i = this.startInNew; i < this.endInNew; i++) {
122
+ const word = this.normalizeForIndex(this.newWords[i]);
123
+ const key = MatchFinder.putNewWord(block, word, this.options.blockSize);
124
+ if (key === null) continue;
125
+ if (!this.wordIndices[key]) this.wordIndices[key] = [];
126
+ this.wordIndices[key].push(i);
127
+ }
128
+ }
129
+ static putNewWord(block, word, blockSize) {
130
+ block.push(word);
131
+ if (block.length > blockSize) block.shift();
132
+ if (block.length !== blockSize) return null;
133
+ return block.join("");
134
+ }
135
+ normalizeForIndex(word) {
136
+ const output = Utils_default.stripAnyAttributes(word);
137
+ if (this.options.ignoreWhitespaceDifferences && Utils_default.isWhiteSpace(output)) return " ";
138
+ return output;
139
+ }
140
+ findMatch() {
141
+ this.indexNewWords();
142
+ this.removeRepeatingWords();
143
+ let hasIndices = false;
144
+ for (const _key in this.wordIndices) {
145
+ hasIndices = true;
146
+ break;
147
+ }
148
+ if (!hasIndices) return null;
149
+ let bestMatchInOld = this.startInOld;
150
+ let bestMatchInNew = this.startInNew;
151
+ let bestMatchSize = 0;
152
+ let matchLengthAt = /* @__PURE__ */ new Map();
153
+ const block = [];
154
+ for (let indexInOld = this.startInOld; indexInOld < this.endInOld; indexInOld++) {
155
+ const word = this.normalizeForIndex(this.oldWords[indexInOld]);
156
+ const index = MatchFinder.putNewWord(block, word, this.options.blockSize);
157
+ if (index === null) continue;
158
+ const newMatchLengthAt = /* @__PURE__ */ new Map();
159
+ if (!this.wordIndices[index]) {
160
+ matchLengthAt = newMatchLengthAt;
161
+ continue;
162
+ }
163
+ for (const indexInNew of this.wordIndices[index]) {
164
+ const newMatchLength = (matchLengthAt.has(indexInNew - 1) ? matchLengthAt.get(indexInNew - 1) : 0) + 1;
165
+ newMatchLengthAt.set(indexInNew, newMatchLength);
166
+ if (newMatchLength > bestMatchSize) {
167
+ bestMatchInOld = indexInOld - newMatchLength - this.options.blockSize + 2;
168
+ bestMatchInNew = indexInNew - newMatchLength - this.options.blockSize + 2;
169
+ bestMatchSize = newMatchLength;
170
+ }
171
+ }
172
+ matchLengthAt = newMatchLengthAt;
173
+ }
174
+ return bestMatchSize !== 0 ? new Match(bestMatchInOld, bestMatchInNew, bestMatchSize + this.options.blockSize - 1) : null;
175
+ }
176
+ /**
177
+ * This method removes words that occur too many times. This way it reduces total count of comparison operations
178
+ * and as result the diff algorithm takes less time. But the side effect is that it may detect false differences of
179
+ * the repeating words.
180
+ * @private
181
+ */
182
+ removeRepeatingWords() {
183
+ const threshold = this.newWords.length * this.options.repeatingWordsAccuracy;
184
+ const repeatingWords = Object.entries(this.wordIndices).filter(([, indices]) => indices.length > threshold).map(([word]) => word);
185
+ for (const w of repeatingWords) delete this.wordIndices[w];
186
+ }
247
187
  };
248
-
249
- // src/Operation.ts
188
+ //#endregion
189
+ //#region src/Operation.ts
250
190
  var Operation = class {
251
- action;
252
- startInOld;
253
- endInOld;
254
- startInNew;
255
- endInNew;
256
- constructor(action, startInOld, endInOld, startInNew, endInNew) {
257
- this.action = action;
258
- this.startInOld = startInOld;
259
- this.endInOld = endInOld;
260
- this.startInNew = startInNew;
261
- this.endInNew = endInNew;
262
- }
191
+ action;
192
+ startInOld;
193
+ endInOld;
194
+ startInNew;
195
+ endInNew;
196
+ constructor(action, startInOld, endInOld, startInNew, endInNew) {
197
+ this.action = action;
198
+ this.startInOld = startInOld;
199
+ this.endInOld = endInOld;
200
+ this.startInNew = startInNew;
201
+ this.endInNew = endInNew;
202
+ }
263
203
  };
264
-
265
- // src/Mode.ts
266
- var Mode = /* @__PURE__ */ ((Mode2) => {
267
- Mode2[Mode2["Character"] = 0] = "Character";
268
- Mode2[Mode2["Tag"] = 1] = "Tag";
269
- Mode2[Mode2["Whitespace"] = 2] = "Whitespace";
270
- Mode2[Mode2["Entity"] = 3] = "Entity";
271
- return Mode2;
272
- })(Mode || {});
273
- var Mode_default = Mode;
274
-
275
- // src/WordSplitter.ts
276
- var WordSplitter = class _WordSplitter {
277
- text;
278
- isBlockCheckRequired;
279
- blockLocations;
280
- mode;
281
- isGrouping = false;
282
- globbingUntil;
283
- currentWord;
284
- words;
285
- static NotGlobbing = -1;
286
- get currentWordHasChars() {
287
- return this.currentWord.length > 0;
288
- }
289
- constructor(text, blockExpressions) {
290
- this.text = text;
291
- this.blockLocations = new BlockFinder(text, blockExpressions).findBlocks();
292
- this.isBlockCheckRequired = this.blockLocations.hasBlocks;
293
- this.mode = Mode_default.Character;
294
- this.globbingUntil = _WordSplitter.NotGlobbing;
295
- this.currentWord = [];
296
- this.words = [];
297
- }
298
- process() {
299
- for (let index = 0; index < this.text.length; index++) {
300
- const character = this.text.charAt(index);
301
- this.processCharacter(index, character);
302
- }
303
- this.appendCurrentWordToWords();
304
- return this.words;
305
- }
306
- processCharacter(index, character) {
307
- if (this.isGlobbing(index, character)) {
308
- return;
309
- }
310
- switch (this.mode) {
311
- case Mode_default.Character:
312
- this.processTextCharacter(character);
313
- break;
314
- case Mode_default.Tag:
315
- this.processHtmlTagContinuation(character);
316
- break;
317
- case Mode_default.Whitespace:
318
- this.processWhiteSpaceContinuation(character);
319
- break;
320
- case Mode_default.Entity:
321
- this.processEntityContinuation(character);
322
- break;
323
- }
324
- }
325
- processEntityContinuation(character) {
326
- if (Utils_default.isStartOfTag(character)) {
327
- this.appendCurrentWordToWords();
328
- this.currentWord.push(character);
329
- this.mode = Mode_default.Tag;
330
- } else if (character.trim().length === 0) {
331
- this.appendCurrentWordToWords();
332
- this.currentWord.push(character);
333
- this.mode = Mode_default.Whitespace;
334
- } else if (Utils_default.isEndOfEntity(character)) {
335
- let switchToNextMode = true;
336
- if (this.currentWordHasChars) {
337
- this.currentWord.push(character);
338
- this.words.push(this.currentWord.join(""));
339
- if (this.words.length > 2 && Utils_default.isWhiteSpace(this.words[this.words.length - 2]) && Utils_default.isWhiteSpace(this.words[this.words.length - 1])) {
340
- const w1 = this.words[this.words.length - 2];
341
- const w2 = this.words[this.words.length - 1];
342
- this.words.splice(this.words.length - 2, 2);
343
- this.currentWord = `${w1}${w2}`.split("");
344
- this.mode = Mode_default.Whitespace;
345
- switchToNextMode = false;
346
- }
347
- }
348
- if (switchToNextMode) {
349
- this.currentWord = [];
350
- this.mode = Mode_default.Character;
351
- }
352
- } else if (Utils_default.isWord(character)) {
353
- this.currentWord.push(character);
354
- } else {
355
- this.appendCurrentWordToWords();
356
- this.currentWord.push(character);
357
- this.mode = Mode_default.Character;
358
- }
359
- }
360
- processWhiteSpaceContinuation(character) {
361
- if (Utils_default.isStartOfTag(character)) {
362
- this.appendCurrentWordToWords();
363
- this.currentWord.push(character);
364
- this.mode = Mode_default.Tag;
365
- } else if (Utils_default.isStartOfEntity(character)) {
366
- this.appendCurrentWordToWords();
367
- this.currentWord.push(character);
368
- this.mode = Mode_default.Entity;
369
- } else if (Utils_default.isWhiteSpace(character)) {
370
- this.currentWord.push(character);
371
- } else {
372
- this.appendCurrentWordToWords();
373
- this.currentWord.push(character);
374
- this.mode = Mode_default.Character;
375
- }
376
- }
377
- processHtmlTagContinuation(character) {
378
- if (Utils_default.isEndOfTag(character)) {
379
- this.currentWord.push(character);
380
- this.appendCurrentWordToWords();
381
- this.mode = Utils_default.isWhiteSpace(character) ? Mode_default.Whitespace : Mode_default.Character;
382
- } else {
383
- this.currentWord.push(character);
384
- }
385
- }
386
- processTextCharacter(character) {
387
- if (Utils_default.isStartOfTag(character)) {
388
- this.appendCurrentWordToWords();
389
- this.currentWord.push("<");
390
- this.mode = Mode_default.Tag;
391
- } else if (Utils_default.isStartOfEntity(character)) {
392
- this.appendCurrentWordToWords();
393
- this.currentWord.push(character);
394
- this.mode = Mode_default.Entity;
395
- } else if (Utils_default.isWhiteSpace(character)) {
396
- this.appendCurrentWordToWords();
397
- this.currentWord.push(character);
398
- this.mode = Mode_default.Whitespace;
399
- } else if (Utils_default.isWord(character) && (this.currentWord.length === 0 || Utils_default.isWord(this.currentWord[this.currentWord.length - 1]))) {
400
- this.currentWord.push(character);
401
- } else {
402
- this.appendCurrentWordToWords();
403
- this.currentWord.push(character);
404
- }
405
- }
406
- appendCurrentWordToWords() {
407
- if (this.currentWordHasChars) {
408
- this.words.push(this.currentWord.join(""));
409
- this.currentWord = [];
410
- }
411
- }
412
- isGlobbing(index, character) {
413
- if (!this.isBlockCheckRequired) {
414
- return false;
415
- }
416
- const isCurrentBlockTerminating = index === this.globbingUntil;
417
- if (isCurrentBlockTerminating) {
418
- this.globbingUntil = _WordSplitter.NotGlobbing;
419
- this.isGrouping = false;
420
- this.appendCurrentWordToWords();
421
- }
422
- const until = this.blockLocations.isInBlock(index);
423
- if (until) {
424
- this.isGrouping = true;
425
- this.globbingUntil = until;
426
- }
427
- if (this.isGrouping) {
428
- this.currentWord.push(character);
429
- this.mode = Mode_default.Character;
430
- }
431
- return this.isGrouping;
432
- }
433
- static convertHtmlToListOfWords(text, blockExpressions) {
434
- return new _WordSplitter(text, blockExpressions).process();
435
- }
204
+ //#endregion
205
+ //#region src/WordSplitter.ts
206
+ var WordSplitter = class WordSplitter {
207
+ text;
208
+ isBlockCheckRequired;
209
+ blockLocations;
210
+ mode;
211
+ isGrouping = false;
212
+ globbingUntil;
213
+ currentWord;
214
+ words;
215
+ static NotGlobbing = -1;
216
+ get currentWordHasChars() {
217
+ return this.currentWord.length > 0;
218
+ }
219
+ constructor(text, blockExpressions) {
220
+ this.text = text;
221
+ this.blockLocations = new BlockFinder(text, blockExpressions).findBlocks();
222
+ this.isBlockCheckRequired = this.blockLocations.hasBlocks;
223
+ this.mode = 0;
224
+ this.globbingUntil = WordSplitter.NotGlobbing;
225
+ this.currentWord = [];
226
+ this.words = [];
227
+ }
228
+ process() {
229
+ for (let index = 0; index < this.text.length; index++) {
230
+ const character = this.text.charAt(index);
231
+ this.processCharacter(index, character);
232
+ }
233
+ this.appendCurrentWordToWords();
234
+ return this.words;
235
+ }
236
+ processCharacter(index, character) {
237
+ if (this.isGlobbing(index, character)) return;
238
+ switch (this.mode) {
239
+ case 0:
240
+ this.processTextCharacter(character);
241
+ break;
242
+ case 1:
243
+ this.processHtmlTagContinuation(character);
244
+ break;
245
+ case 2:
246
+ this.processWhiteSpaceContinuation(character);
247
+ break;
248
+ case 3:
249
+ this.processEntityContinuation(character);
250
+ break;
251
+ }
252
+ }
253
+ processEntityContinuation(character) {
254
+ if (Utils_default.isStartOfTag(character)) {
255
+ this.appendCurrentWordToWords();
256
+ this.currentWord.push(character);
257
+ this.mode = 1;
258
+ } else if (character.trim().length === 0) {
259
+ this.appendCurrentWordToWords();
260
+ this.currentWord.push(character);
261
+ this.mode = 2;
262
+ } else if (Utils_default.isEndOfEntity(character)) {
263
+ let switchToNextMode = true;
264
+ if (this.currentWordHasChars) {
265
+ this.currentWord.push(character);
266
+ this.words.push(this.currentWord.join(""));
267
+ if (this.words.length > 2 && Utils_default.isWhiteSpace(this.words[this.words.length - 2]) && Utils_default.isWhiteSpace(this.words[this.words.length - 1])) {
268
+ const w1 = this.words[this.words.length - 2];
269
+ const w2 = this.words[this.words.length - 1];
270
+ this.words.splice(this.words.length - 2, 2);
271
+ this.currentWord = `${w1}${w2}`.split("");
272
+ this.mode = 2;
273
+ switchToNextMode = false;
274
+ }
275
+ }
276
+ if (switchToNextMode) {
277
+ this.currentWord = [];
278
+ this.mode = 0;
279
+ }
280
+ } else if (Utils_default.isWord(character)) this.currentWord.push(character);
281
+ else {
282
+ this.appendCurrentWordToWords();
283
+ this.currentWord.push(character);
284
+ this.mode = 0;
285
+ }
286
+ }
287
+ processWhiteSpaceContinuation(character) {
288
+ if (Utils_default.isStartOfTag(character)) {
289
+ this.appendCurrentWordToWords();
290
+ this.currentWord.push(character);
291
+ this.mode = 1;
292
+ } else if (Utils_default.isStartOfEntity(character)) {
293
+ this.appendCurrentWordToWords();
294
+ this.currentWord.push(character);
295
+ this.mode = 3;
296
+ } else if (Utils_default.isWhiteSpace(character)) this.currentWord.push(character);
297
+ else {
298
+ this.appendCurrentWordToWords();
299
+ this.currentWord.push(character);
300
+ this.mode = 0;
301
+ }
302
+ }
303
+ processHtmlTagContinuation(character) {
304
+ if (Utils_default.isEndOfTag(character)) {
305
+ this.currentWord.push(character);
306
+ this.appendCurrentWordToWords();
307
+ this.mode = Utils_default.isWhiteSpace(character) ? 2 : 0;
308
+ } else this.currentWord.push(character);
309
+ }
310
+ processTextCharacter(character) {
311
+ if (Utils_default.isStartOfTag(character)) {
312
+ this.appendCurrentWordToWords();
313
+ this.currentWord.push("<");
314
+ this.mode = 1;
315
+ } else if (Utils_default.isStartOfEntity(character)) {
316
+ this.appendCurrentWordToWords();
317
+ this.currentWord.push(character);
318
+ this.mode = 3;
319
+ } else if (Utils_default.isWhiteSpace(character)) {
320
+ this.appendCurrentWordToWords();
321
+ this.currentWord.push(character);
322
+ this.mode = 2;
323
+ } else if (Utils_default.isWord(character) && (this.currentWord.length === 0 || Utils_default.isWord(this.currentWord[this.currentWord.length - 1]))) this.currentWord.push(character);
324
+ else {
325
+ this.appendCurrentWordToWords();
326
+ this.currentWord.push(character);
327
+ }
328
+ }
329
+ appendCurrentWordToWords() {
330
+ if (this.currentWordHasChars) {
331
+ this.words.push(this.currentWord.join(""));
332
+ this.currentWord = [];
333
+ }
334
+ }
335
+ isGlobbing(index, character) {
336
+ if (!this.isBlockCheckRequired) return false;
337
+ if (index === this.globbingUntil) {
338
+ this.globbingUntil = WordSplitter.NotGlobbing;
339
+ this.isGrouping = false;
340
+ this.appendCurrentWordToWords();
341
+ }
342
+ const until = this.blockLocations.isInBlock(index);
343
+ if (until) {
344
+ this.isGrouping = true;
345
+ this.globbingUntil = until;
346
+ }
347
+ if (this.isGrouping) {
348
+ this.currentWord.push(character);
349
+ this.mode = 0;
350
+ }
351
+ return this.isGrouping;
352
+ }
353
+ static convertHtmlToListOfWords(text, blockExpressions) {
354
+ return new WordSplitter(text, blockExpressions).process();
355
+ }
436
356
  };
437
357
  var BlockFinderResult = class {
438
- blocks = /* @__PURE__ */ new Map();
439
- addBlock(from, to) {
440
- if (this.blocks.has(from)) {
441
- throw new ArgumentError("One or more block expressions result in a text sequence that overlaps.");
442
- }
443
- this.blocks.set(from, to);
444
- }
445
- isInBlock(location) {
446
- return this.blocks.get(location) ?? null;
447
- }
448
- get hasBlocks() {
449
- return this.blocks.size > 0;
450
- }
451
- };
452
- var ArgumentError = class extends Error {
358
+ blocks = /* @__PURE__ */ new Map();
359
+ addBlock(from, to) {
360
+ if (this.blocks.has(from)) throw new ArgumentError("One or more block expressions result in a text sequence that overlaps.");
361
+ this.blocks.set(from, to);
362
+ }
363
+ isInBlock(location) {
364
+ return this.blocks.get(location) ?? null;
365
+ }
366
+ get hasBlocks() {
367
+ return this.blocks.size > 0;
368
+ }
453
369
  };
370
+ var ArgumentError = class extends Error {};
454
371
  var BlockFinder = class {
455
- text;
456
- blockExpressions;
457
- constructor(text, blockExpressions) {
458
- this.text = text;
459
- this.blockExpressions = blockExpressions;
460
- }
461
- findBlocks() {
462
- const result = new BlockFinderResult();
463
- for (const expression of this.blockExpressions) {
464
- this.processBlockMatcher(expression, result);
465
- }
466
- return result;
467
- }
468
- processBlockMatcher(exp, result) {
469
- let match;
470
- while ((match = exp.exec(this.text)) !== null) {
471
- this.tryAddBlock(exp, match, result);
472
- }
473
- }
474
- tryAddBlock(exp, match, result) {
475
- try {
476
- const from = match.index;
477
- const to = match.index + match[0].length;
478
- result.addBlock(from, to);
479
- } catch {
480
- throw new ArgumentError(
481
- `One or more block expressions result in a text sequence that overlaps. Current expression: ${exp}`
482
- );
483
- }
484
- }
372
+ text;
373
+ blockExpressions;
374
+ constructor(text, blockExpressions) {
375
+ this.text = text;
376
+ this.blockExpressions = blockExpressions;
377
+ }
378
+ findBlocks() {
379
+ const result = new BlockFinderResult();
380
+ for (const expression of this.blockExpressions) this.processBlockMatcher(expression, result);
381
+ return result;
382
+ }
383
+ processBlockMatcher(exp, result) {
384
+ let match;
385
+ while ((match = exp.exec(this.text)) !== null) this.tryAddBlock(exp, match, result);
386
+ }
387
+ tryAddBlock(exp, match, result) {
388
+ try {
389
+ const from = match.index;
390
+ const to = match.index + match[0].length;
391
+ result.addBlock(from, to);
392
+ } catch {
393
+ throw new ArgumentError(`One or more block expressions result in a text sequence that overlaps. Current expression: ${exp}`);
394
+ }
395
+ }
485
396
  };
486
-
487
- // src/HtmlDiff.ts
488
- var HtmlDiff = class _HtmlDiff {
489
- /**
490
- * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
491
- * @private
492
- */
493
- static MatchGranularityMaximum = 4;
494
- static DelTag = "del";
495
- static InsTag = "ins";
496
- // ignore case
497
- static SpecialCaseClosingTags = [
498
- "</strong>",
499
- "</em>",
500
- "</b>",
501
- "</i>",
502
- "</big>",
503
- "</small>",
504
- "</u>",
505
- "</sub>",
506
- "</sup>",
507
- "</strike>",
508
- "</s>",
509
- "</span>"
510
- ];
511
- static SpecialCaseClosingTagsSet = /* @__PURE__ */ new Set([
512
- "</strong>",
513
- "</em>",
514
- "</b>",
515
- "</i>",
516
- "</big>",
517
- "</small>",
518
- "</u>",
519
- "</sub>",
520
- "</sup>",
521
- "</strike>",
522
- "</s>",
523
- "</span>"
524
- ]);
525
- static SpecialCaseOpeningTagRegex = /<((strong)|(b)|(i)|(em)|(big)|(small)|(u)|(sub)|(sup)|(strike)|(s)|(span))[>\s]+/i;
526
- content = [];
527
- newText;
528
- oldText;
529
- specialTagDiffStack = [];
530
- newWords = [];
531
- oldWords = [];
532
- matchGranularity = 0;
533
- blockExpressions = [];
534
- /**
535
- * Defines how to compare repeating words. Valid values are from 0 to 1.
536
- * This value allows to exclude some words from comparison that eventually
537
- * reduces the total time of the diff algorithm.
538
- * 0 means that all words are excluded so the diff will not find any matching words at all.
539
- * 1 (default value) means that all words participate in comparison so this is the most accurate case.
540
- * 0.5 means that any word that occurs more than 50% times may be excluded from comparison. This doesn't
541
- * mean that such words will definitely be excluded but only gives a permission to exclude them if necessary.
542
- */
543
- repeatingWordsAccuracy = 1;
544
- /**
545
- * If true all whitespaces are considered as equal
546
- */
547
- ignoreWhitespaceDifferences = false;
548
- /**
549
- * If some match is too small and located far from its neighbors then it is considered as orphan
550
- * and removed. For example:
551
- * <code>
552
- * aaaaa bb ccccccccc dddddd ee
553
- * 11111 bb 222222222 dddddd ee
554
- * </code>
555
- * will find two matches <code>bb</code> and <code>dddddd ee</code> but the first will be considered
556
- * as orphan and ignored, as result it will consider texts <code>aaaaa bb ccccccccc</code> and
557
- * <code>11111 bb 222222222</code> as single replacement:
558
- * <code>
559
- * &lt;del&gt;aaaaa bb ccccccccc&lt;/del&gt;&lt;ins&gt;11111 bb 222222222&lt;/ins&gt; dddddd ee
560
- * </code>
561
- * This property defines relative size of the match to be considered as orphan, from 0 to 1.
562
- * 1 means that all matches will be considered as orphans.
563
- * 0 (default) means that no match will be considered as orphan.
564
- * 0.2 means that if match length is less than 20% of distance between its neighbors it is considered as orphan.
565
- */
566
- orphanMatchThreshold = 0;
567
- /**
568
- * Initializes a new instance of the class.
569
- * @param oldText The old text.
570
- * @param newText The new text.
571
- */
572
- constructor(oldText, newText) {
573
- this.oldText = oldText;
574
- this.newText = newText;
575
- }
576
- static execute(oldText, newText) {
577
- return new _HtmlDiff(oldText, newText).build();
578
- }
579
- /**
580
- * Builds the HTML diff output
581
- * @return HTML diff markup
582
- */
583
- build() {
584
- if (this.oldText === this.newText) {
585
- return this.newText;
586
- }
587
- this.splitInputsToWords();
588
- this.matchGranularity = Math.min(
589
- _HtmlDiff.MatchGranularityMaximum,
590
- Math.min(this.oldWords.length, this.newWords.length)
591
- );
592
- const operations = this.operations();
593
- for (const op of operations) {
594
- this.performOperation(op);
595
- }
596
- return this.content.join("");
597
- }
598
- /**
599
- * Uses {@link expression} to group text together so that any change detected within the group is treated as a single block
600
- * @param expression
601
- */
602
- addBlockExpression(expression) {
603
- this.blockExpressions.push(expression);
604
- }
605
- splitInputsToWords() {
606
- this.oldWords = WordSplitter.convertHtmlToListOfWords(this.oldText, this.blockExpressions);
607
- this.oldText = "";
608
- this.newWords = WordSplitter.convertHtmlToListOfWords(this.newText, this.blockExpressions);
609
- this.newText = "";
610
- }
611
- performOperation(operation) {
612
- switch (operation.action) {
613
- case Action_default.Equal:
614
- this.processEqualOperation(operation);
615
- break;
616
- case Action_default.Delete:
617
- this.processDeleteOperation(operation, "diffdel");
618
- break;
619
- case Action_default.Insert:
620
- this.processInsertOperation(operation, "diffins");
621
- break;
622
- case Action_default.None:
623
- break;
624
- case Action_default.Replace:
625
- this.processReplaceOperation(operation);
626
- break;
627
- }
628
- }
629
- processReplaceOperation(operation) {
630
- this.processDeleteOperation(operation, "diffmod");
631
- this.processInsertOperation(operation, "diffmod");
632
- }
633
- processInsertOperation(operation, cssClass) {
634
- const text = this.newWords.slice(operation.startInNew, operation.endInNew);
635
- this.insertTag(_HtmlDiff.InsTag, cssClass, text);
636
- }
637
- processDeleteOperation(operation, cssClass) {
638
- const text = this.oldWords.slice(operation.startInOld, operation.endInOld);
639
- this.insertTag(_HtmlDiff.DelTag, cssClass, text);
640
- }
641
- processEqualOperation(operation) {
642
- const result = this.newWords.slice(operation.startInNew, operation.endInNew);
643
- this.content.push(result.join(""));
644
- }
645
- /**
646
- * This method encloses words within a specified tag (ins or del), and adds this into "content",
647
- * with a twist: if there are words contain tags, it actually creates multiple ins or del,
648
- * so that they don't include any ins or del. This handles cases like
649
- * old: '<p>a</p>'
650
- * new: '<p>ab</p>
651
- * <p>
652
- * c</b>'
653
- * diff result: '<p>a<ins>b</ins></p>
654
- * <p>
655
- * <ins>c</ins>
656
- * </p>
657
- * '
658
- * this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
659
- * del tags), but handles correctly more cases than the earlier version.
660
- * P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
661
- * @param tag
662
- * @param cssClass
663
- * @param words
664
- * @private
665
- */
666
- insertTag(tag, cssClass, words) {
667
- while (true) {
668
- if (words.length === 0) {
669
- break;
670
- }
671
- const allWordsUntilFirstTag = this.extractConsecutiveWords(words, (x) => !Utils_default.isTag(x));
672
- if (allWordsUntilFirstTag.length > 0) {
673
- const text = Utils_default.wrapText(allWordsUntilFirstTag.join(""), tag, cssClass);
674
- this.content.push(text);
675
- }
676
- const isInsertOpCompleted = words.length === 0;
677
- if (isInsertOpCompleted) {
678
- break;
679
- }
680
- const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
681
- const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
682
- let specialCaseTagInjection = "";
683
- let specialCaseTagInjectionIsBefore = false;
684
- if (_HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
685
- const tagNames = /* @__PURE__ */ new Set();
686
- for (const word of words) {
687
- if (Utils_default.isTag(word)) {
688
- tagNames.add(Utils_default.getTagName(word));
689
- }
690
- }
691
- const styledTagNames = Array.from(tagNames).join(" ");
692
- this.specialTagDiffStack.push(words[0]);
693
- specialCaseTagInjection = `<ins class='mod ${styledTagNames}'>`;
694
- if (tag === _HtmlDiff.DelTag) {
695
- words.shift();
696
- while (words.length > 0 && _HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
697
- words.shift();
698
- }
699
- }
700
- } else if (_HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
701
- const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
702
- const openingAndClosingTagsMatch = !!openingTag && Utils_default.getTagName(openingTag) === Utils_default.getTagName(words[indexLastTagInFirstTagBlock]);
703
- if (!!openingTag && openingAndClosingTagsMatch) {
704
- specialCaseTagInjection = "</ins>";
705
- specialCaseTagInjectionIsBefore = true;
706
- } else if (openingTag) {
707
- this.specialTagDiffStack.push(openingTag);
708
- }
709
- if (tag === _HtmlDiff.DelTag) {
710
- words.shift();
711
- while (words.length > 0 && _HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
712
- words.shift();
713
- }
714
- }
715
- }
716
- if (words.length === 0 && specialCaseTagInjection.length === 0) {
717
- break;
718
- }
719
- if (specialCaseTagInjectionIsBefore) {
720
- this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, Utils_default.isTag).join(""));
721
- } else {
722
- this.content.push(this.extractConsecutiveWords(words, Utils_default.isTag).join("") + specialCaseTagInjection);
723
- }
724
- if (words.length === 0) continue;
725
- this.insertTag(tag, cssClass, words);
726
- break;
727
- }
728
- }
729
- extractConsecutiveWords(words, condition) {
730
- let indexOfFirstTag = null;
731
- for (let i = 0; i < words.length; i++) {
732
- const word = words[i];
733
- if (i === 0 && word === " ") {
734
- words[i] = "&nbsp;";
735
- }
736
- if (!condition(word)) {
737
- indexOfFirstTag = i;
738
- break;
739
- }
740
- }
741
- if (indexOfFirstTag !== null) {
742
- const items2 = words.slice(0, indexOfFirstTag);
743
- if (indexOfFirstTag > 0) {
744
- words.splice(0, indexOfFirstTag);
745
- }
746
- return items2;
747
- }
748
- const items = words.slice(0);
749
- words.splice(0, words.length);
750
- return items;
751
- }
752
- operations() {
753
- let positionInOld = 0;
754
- let positionInNew = 0;
755
- const operations = [];
756
- const matches = this.matchingBlocks();
757
- matches.push(new Match(this.oldWords.length, this.newWords.length, 0));
758
- const matchesWithoutOrphans = this.removeOrphans(matches);
759
- for (const match of matchesWithoutOrphans) {
760
- const matchStartsAtCurrentPositionInOld = positionInOld === match.startInOld;
761
- const matchStartsAtCurrentPositionInNew = positionInNew === match.startInNew;
762
- let action;
763
- if (!matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {
764
- action = Action_default.Replace;
765
- } else if (matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {
766
- action = Action_default.Insert;
767
- } else if (!matchStartsAtCurrentPositionInOld) {
768
- action = Action_default.Delete;
769
- } else {
770
- action = Action_default.None;
771
- }
772
- if (action !== Action_default.None) {
773
- operations.push(new Operation(action, positionInOld, match.startInOld, positionInNew, match.startInNew));
774
- }
775
- if (match.size !== 0) {
776
- operations.push(new Operation(Action_default.Equal, match.startInOld, match.endInOld, match.startInNew, match.endInNew));
777
- }
778
- positionInOld = match.endInOld;
779
- positionInNew = match.endInNew;
780
- }
781
- return operations;
782
- }
783
- *removeOrphans(matches) {
784
- let prev = new Match(0, 0, 0);
785
- let curr = null;
786
- for (const next of matches) {
787
- if (curr === null) {
788
- curr = next;
789
- continue;
790
- }
791
- if (prev.endInOld === curr.startInOld && prev.endInNew === curr.startInNew || curr.endInOld === next.startInOld && curr.endInNew === next.startInNew) {
792
- yield curr;
793
- prev = curr;
794
- curr = next;
795
- continue;
796
- }
797
- let oldDistanceInChars = 0;
798
- for (let i = prev.endInOld; i < next.startInOld; i++) {
799
- oldDistanceInChars += this.oldWords[i].length;
800
- }
801
- let newDistanceInChars = 0;
802
- for (let i = prev.endInNew; i < next.startInNew; i++) {
803
- newDistanceInChars += this.newWords[i].length;
804
- }
805
- let currMatchLengthInChars = 0;
806
- for (let i = curr.startInNew; i < curr.endInNew; i++) {
807
- currMatchLengthInChars += this.newWords[i].length;
808
- }
809
- if (currMatchLengthInChars > Math.max(oldDistanceInChars, newDistanceInChars) * this.orphanMatchThreshold) {
810
- yield curr;
811
- }
812
- prev = curr;
813
- curr = next;
814
- }
815
- if (curr !== null) {
816
- yield curr;
817
- }
818
- }
819
- matchingBlocks() {
820
- const matchingBlocks = [];
821
- this.findMatchingBlocks(0, this.oldWords.length, 0, this.newWords.length, matchingBlocks);
822
- return matchingBlocks;
823
- }
824
- findMatchingBlocks(startInOld, endInOld, startInNew, endInNew, matchingBlocks) {
825
- const match = this.findMatch(startInOld, endInOld, startInNew, endInNew);
826
- if (match !== null) {
827
- if (startInOld < match.startInOld && startInNew < match.startInNew) {
828
- this.findMatchingBlocks(startInOld, match.startInOld, startInNew, match.startInNew, matchingBlocks);
829
- }
830
- matchingBlocks.push(match);
831
- if (match.endInOld < endInOld && match.endInNew < endInNew) {
832
- this.findMatchingBlocks(match.endInOld, endInOld, match.endInNew, endInNew, matchingBlocks);
833
- }
834
- }
835
- }
836
- findMatch(startInOld, endInOld, startInNew, endInNew) {
837
- for (let i = this.matchGranularity; i > 0; i--) {
838
- const options = {
839
- blockSize: i,
840
- repeatingWordsAccuracy: this.repeatingWordsAccuracy,
841
- ignoreWhitespaceDifferences: this.ignoreWhitespaceDifferences
842
- };
843
- const finder = new MatchFinder(this.oldWords, this.newWords, startInOld, endInOld, startInNew, endInNew, options);
844
- const match = finder.findMatch();
845
- if (match !== null) return match;
846
- }
847
- return null;
848
- }
397
+ //#endregion
398
+ //#region src/HtmlDiff.ts
399
+ var HtmlDiff = class HtmlDiff {
400
+ /**
401
+ * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
402
+ * @private
403
+ */
404
+ static MatchGranularityMaximum = 4;
405
+ static DelTag = "del";
406
+ static InsTag = "ins";
407
+ static SpecialCaseClosingTags = [
408
+ "</strong>",
409
+ "</em>",
410
+ "</b>",
411
+ "</i>",
412
+ "</big>",
413
+ "</small>",
414
+ "</u>",
415
+ "</sub>",
416
+ "</sup>",
417
+ "</strike>",
418
+ "</s>",
419
+ "</span>"
420
+ ];
421
+ static SpecialCaseClosingTagsSet = new Set([
422
+ "</strong>",
423
+ "</em>",
424
+ "</b>",
425
+ "</i>",
426
+ "</big>",
427
+ "</small>",
428
+ "</u>",
429
+ "</sub>",
430
+ "</sup>",
431
+ "</strike>",
432
+ "</s>",
433
+ "</span>"
434
+ ]);
435
+ static SpecialCaseOpeningTagRegex = /<((strong)|(b)|(i)|(em)|(big)|(small)|(u)|(sub)|(sup)|(strike)|(s)|(span))[>\s]+/i;
436
+ static FormattingTags = new Set([
437
+ "strong",
438
+ "em",
439
+ "b",
440
+ "i",
441
+ "big",
442
+ "small",
443
+ "u",
444
+ "sub",
445
+ "sup",
446
+ "strike",
447
+ "s",
448
+ "span"
449
+ ]);
450
+ content = [];
451
+ newText;
452
+ oldText;
453
+ specialTagDiffStack = [];
454
+ newWords = [];
455
+ oldWords = [];
456
+ /**
457
+ * Content-only projections of oldWords/newWords (structural tags and adjacent whitespace removed).
458
+ * When null, no structural normalization is applied (the word arrays are identical for diffing).
459
+ */
460
+ oldContentWords = null;
461
+ newContentWords = null;
462
+ /** Maps content-word index → original word index */
463
+ oldContentToOriginal = null;
464
+ newContentToOriginal = null;
465
+ /** Tracks the last original old word index output, so equal operations can include leading structural tags */
466
+ lastOriginalOldOutputIndex = 0;
467
+ matchGranularity = 0;
468
+ blockExpressions = [];
469
+ /**
470
+ * Defines how to compare repeating words. Valid values are from 0 to 1.
471
+ * This value allows to exclude some words from comparison that eventually
472
+ * reduces the total time of the diff algorithm.
473
+ * 0 means that all words are excluded so the diff will not find any matching words at all.
474
+ * 1 (default value) means that all words participate in comparison so this is the most accurate case.
475
+ * 0.5 means that any word that occurs more than 50% times may be excluded from comparison. This doesn't
476
+ * mean that such words will definitely be excluded but only gives a permission to exclude them if necessary.
477
+ */
478
+ repeatingWordsAccuracy = 1;
479
+ /**
480
+ * If true all whitespaces are considered as equal
481
+ */
482
+ ignoreWhitespaceDifferences = false;
483
+ /**
484
+ * If some match is too small and located far from its neighbors then it is considered as orphan
485
+ * and removed. For example:
486
+ * <code>
487
+ * aaaaa bb ccccccccc dddddd ee
488
+ * 11111 bb 222222222 dddddd ee
489
+ * </code>
490
+ * will find two matches <code>bb</code> and <code>dddddd ee</code> but the first will be considered
491
+ * as orphan and ignored, as result it will consider texts <code>aaaaa bb ccccccccc</code> and
492
+ * <code>11111 bb 222222222</code> as single replacement:
493
+ * <code>
494
+ * &lt;del&gt;aaaaa bb ccccccccc&lt;/del&gt;&lt;ins&gt;11111 bb 222222222&lt;/ins&gt; dddddd ee
495
+ * </code>
496
+ * This property defines relative size of the match to be considered as orphan, from 0 to 1.
497
+ * 1 means that all matches will be considered as orphans.
498
+ * 0 (default) means that no match will be considered as orphan.
499
+ * 0.2 means that if match length is less than 20% of distance between its neighbors it is considered as orphan.
500
+ */
501
+ orphanMatchThreshold = 0;
502
+ /**
503
+ * Initializes a new instance of the class.
504
+ * @param oldText The old text.
505
+ * @param newText The new text.
506
+ */
507
+ constructor(oldText, newText) {
508
+ this.oldText = oldText;
509
+ this.newText = newText;
510
+ }
511
+ static execute(oldText, newText) {
512
+ return new HtmlDiff(oldText, newText).build();
513
+ }
514
+ /**
515
+ * Builds the HTML diff output
516
+ * @return HTML diff markup
517
+ */
518
+ build() {
519
+ if (this.oldText === this.newText) return this.newText;
520
+ this.splitInputsToWords();
521
+ this.buildContentProjections();
522
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
523
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
524
+ this.matchGranularity = Math.min(HtmlDiff.MatchGranularityMaximum, Math.min(wordsForDiffOld.length, wordsForDiffNew.length));
525
+ const operations = this.operations();
526
+ for (const op of operations) this.performOperation(op);
527
+ return this.content.join("");
528
+ }
529
+ /**
530
+ * Uses {@link expression} to group text together so that any change detected within the group is treated as a single block
531
+ * @param expression
532
+ */
533
+ addBlockExpression(expression) {
534
+ this.blockExpressions.push(expression);
535
+ }
536
+ splitInputsToWords() {
537
+ this.oldWords = WordSplitter.convertHtmlToListOfWords(this.oldText, this.blockExpressions);
538
+ this.oldText = "";
539
+ this.newWords = WordSplitter.convertHtmlToListOfWords(this.newText, this.blockExpressions);
540
+ this.newText = "";
541
+ }
542
+ /**
543
+ * Checks whether the two word arrays have structural HTML differences (different non-formatting tags
544
+ * or different whitespace between structural tags). When they do, builds "content projections" that
545
+ * strip structural noise so the diff algorithm only sees meaningful content and formatting changes.
546
+ */
547
+ buildContentProjections() {
548
+ const oldProjection = HtmlDiff.createContentProjection(this.oldWords);
549
+ const newProjection = HtmlDiff.createContentProjection(this.newWords);
550
+ if (!HtmlDiff.hasStructuralDifferences(this.oldWords, this.newWords)) return;
551
+ if (oldProjection.contentWords.length === 0 || newProjection.contentWords.length === 0) return;
552
+ this.oldContentWords = oldProjection.contentWords;
553
+ this.oldContentToOriginal = oldProjection.contentToOriginal;
554
+ this.newContentWords = newProjection.contentWords;
555
+ this.newContentToOriginal = newProjection.contentToOriginal;
556
+ }
557
+ /**
558
+ * Tags that commonly serve as content wrappers and may change structurally
559
+ * without affecting the actual content. Only these tags are stripped during
560
+ * structural normalization.
561
+ */
562
+ static WrapperTags = new Set([
563
+ "div",
564
+ "p",
565
+ "section",
566
+ "article",
567
+ "main",
568
+ "header",
569
+ "footer",
570
+ "aside",
571
+ "nav"
572
+ ]);
573
+ static isStructuralTag(word) {
574
+ if (!Utils_default.isTag(word)) return false;
575
+ const tagName = Utils_default.getTagName(word);
576
+ return HtmlDiff.WrapperTags.has(tagName);
577
+ }
578
+ /**
579
+ * Returns true if words between structural tags are just whitespace (indentation).
580
+ */
581
+ static isStructuralWhitespace(words, index) {
582
+ if (!Utils_default.isWhiteSpace(words[index])) return false;
583
+ const prevIsStructural = index === 0 || HtmlDiff.isStructuralTag(words[index - 1]);
584
+ const nextIsStructural = index === words.length - 1 || HtmlDiff.isStructuralTag(words[index + 1]);
585
+ return prevIsStructural || nextIsStructural;
586
+ }
587
+ static createContentProjection(words) {
588
+ const contentWords = [];
589
+ const contentToOriginal = [];
590
+ for (let i = 0; i < words.length; i++) {
591
+ if (HtmlDiff.isStructuralTag(words[i])) continue;
592
+ if (HtmlDiff.isStructuralWhitespace(words, i)) continue;
593
+ contentWords.push(words[i]);
594
+ contentToOriginal.push(i);
595
+ }
596
+ return {
597
+ contentWords,
598
+ contentToOriginal
599
+ };
600
+ }
601
+ static hasStructuralDifferences(oldWords, newWords) {
602
+ const oldStructural = [];
603
+ const newStructural = [];
604
+ for (const w of oldWords) if (HtmlDiff.isStructuralTag(w)) oldStructural.push(Utils_default.stripTagAttributes(w));
605
+ for (const w of newWords) if (HtmlDiff.isStructuralTag(w)) newStructural.push(Utils_default.stripTagAttributes(w));
606
+ if (oldStructural.length !== newStructural.length) return true;
607
+ for (let i = 0; i < oldStructural.length; i++) if (oldStructural[i] !== newStructural[i]) return true;
608
+ return false;
609
+ }
610
+ performOperation(operation) {
611
+ switch (operation.action) {
612
+ case 0:
613
+ this.processEqualOperation(operation);
614
+ break;
615
+ case 1:
616
+ this.processDeleteOperation(operation, "diffdel");
617
+ break;
618
+ case 2:
619
+ this.processInsertOperation(operation, "diffins");
620
+ break;
621
+ case 3: break;
622
+ case 4:
623
+ this.processReplaceOperation(operation);
624
+ break;
625
+ }
626
+ }
627
+ processReplaceOperation(operation) {
628
+ this.processDeleteOperation(operation, "diffmod");
629
+ this.processInsertOperation(operation, "diffmod");
630
+ }
631
+ processInsertOperation(operation, cssClass) {
632
+ const words = this.oldContentWords ? this.getOriginalNewWords(operation.startInNew, operation.endInNew) : this.newWords.slice(operation.startInNew, operation.endInNew);
633
+ this.insertTag(HtmlDiff.InsTag, cssClass, words);
634
+ }
635
+ processDeleteOperation(operation, cssClass) {
636
+ const words = this.oldContentWords ? this.getOriginalOldWords(operation.startInOld, operation.endInOld) : this.oldWords.slice(operation.startInOld, operation.endInOld);
637
+ this.insertTag(HtmlDiff.DelTag, cssClass, words);
638
+ if (this.oldContentToOriginal && operation.endInOld > 0) {
639
+ const lastDeletedOrigIdx = this.oldContentToOriginal[operation.endInOld - 1];
640
+ this.lastOriginalOldOutputIndex = Math.max(this.lastOriginalOldOutputIndex, lastDeletedOrigIdx + 1);
641
+ }
642
+ }
643
+ processEqualOperation(operation) {
644
+ if (this.oldContentWords) {
645
+ const result = this.getOriginalOldWordsWithStructure(operation.startInOld, operation.endInOld);
646
+ this.content.push(result.join(""));
647
+ } else {
648
+ const result = this.newWords.slice(operation.startInNew, operation.endInNew);
649
+ this.content.push(result.join(""));
650
+ }
651
+ }
652
+ /**
653
+ * Gets original old words for a content-index range, including only content and formatting tags
654
+ * (used for delete/replace operations where we don't want structural tags).
655
+ */
656
+ getOriginalOldWords(contentStart, contentEnd) {
657
+ if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd);
658
+ const result = [];
659
+ for (let i = contentStart; i < contentEnd; i++) result.push(this.oldWords[this.oldContentToOriginal[i]]);
660
+ return result;
661
+ }
662
+ /**
663
+ * Gets original new words for a content-index range, including only content and formatting tags
664
+ * (used for insert/replace operations where we don't want structural tags).
665
+ */
666
+ getOriginalNewWords(contentStart, contentEnd) {
667
+ if (!this.newContentToOriginal) return this.newWords.slice(contentStart, contentEnd);
668
+ const result = [];
669
+ for (let i = contentStart; i < contentEnd; i++) result.push(this.newWords[this.newContentToOriginal[i]]);
670
+ return result;
671
+ }
672
+ /**
673
+ * Gets original old words for a content-index range, INCLUDING structural tags and whitespace
674
+ * between the content words (used for equal operations to preserve old HTML structure).
675
+ */
676
+ getOriginalOldWordsWithStructure(contentStart, contentEnd) {
677
+ if (!this.oldContentToOriginal) return this.oldWords.slice(contentStart, contentEnd);
678
+ if (contentStart >= contentEnd) return [];
679
+ const firstContentOrigIdx = this.oldContentToOriginal[contentStart];
680
+ const origStart = Math.min(this.lastOriginalOldOutputIndex, firstContentOrigIdx);
681
+ const origEnd = contentEnd < this.oldContentToOriginal.length ? this.oldContentToOriginal[contentEnd] : this.oldWords.length;
682
+ this.lastOriginalOldOutputIndex = origEnd;
683
+ return this.oldWords.slice(origStart, origEnd);
684
+ }
685
+ /**
686
+ * This method encloses words within a specified tag (ins or del), and adds this into "content",
687
+ * with a twist: if there are words contain tags, it actually creates multiple ins or del,
688
+ * so that they don't include any ins or del. This handles cases like
689
+ * old: '<p>a</p>'
690
+ * new: '<p>ab</p>
691
+ * <p>
692
+ * c</b>'
693
+ * diff result: '<p>a<ins>b</ins></p>
694
+ * <p>
695
+ * <ins>c</ins>
696
+ * </p>
697
+ * '
698
+ * this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
699
+ * del tags), but handles correctly more cases than the earlier version.
700
+ * P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
701
+ * @param tag
702
+ * @param cssClass
703
+ * @param words
704
+ * @private
705
+ */
706
+ insertTag(tag, cssClass, words) {
707
+ while (true) {
708
+ if (words.length === 0) break;
709
+ const allWordsUntilFirstTag = this.extractConsecutiveWords(words, (x) => !Utils_default.isTag(x));
710
+ if (allWordsUntilFirstTag.length > 0) {
711
+ const text = Utils_default.wrapText(allWordsUntilFirstTag.join(""), tag, cssClass);
712
+ this.content.push(text);
713
+ }
714
+ if (words.length === 0) break;
715
+ const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
716
+ const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
717
+ let specialCaseTagInjection = "";
718
+ let specialCaseTagInjectionIsBefore = false;
719
+ if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
720
+ const tagNames = /* @__PURE__ */ new Set();
721
+ for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
722
+ const styledTagNames = Array.from(tagNames).join(" ");
723
+ this.specialTagDiffStack.push(words[0]);
724
+ specialCaseTagInjection = `<ins class='mod ${styledTagNames}'>`;
725
+ if (tag === HtmlDiff.DelTag) {
726
+ words.shift();
727
+ while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
728
+ }
729
+ } else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
730
+ const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
731
+ const openingAndClosingTagsMatch = !!openingTag && Utils_default.getTagName(openingTag) === Utils_default.getTagName(words[indexLastTagInFirstTagBlock]);
732
+ if (!!openingTag && openingAndClosingTagsMatch) {
733
+ specialCaseTagInjection = "</ins>";
734
+ specialCaseTagInjectionIsBefore = true;
735
+ } else if (openingTag) this.specialTagDiffStack.push(openingTag);
736
+ if (tag === HtmlDiff.DelTag) {
737
+ words.shift();
738
+ while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
739
+ }
740
+ }
741
+ if (words.length === 0 && specialCaseTagInjection.length === 0) break;
742
+ if (specialCaseTagInjectionIsBefore) this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, Utils_default.isTag).join(""));
743
+ else this.content.push(this.extractConsecutiveWords(words, Utils_default.isTag).join("") + specialCaseTagInjection);
744
+ if (words.length === 0) continue;
745
+ this.insertTag(tag, cssClass, words);
746
+ break;
747
+ }
748
+ }
749
+ extractConsecutiveWords(words, condition) {
750
+ let indexOfFirstTag = null;
751
+ for (let i = 0; i < words.length; i++) {
752
+ const word = words[i];
753
+ if (i === 0 && word === " ") words[i] = "&nbsp;";
754
+ if (!condition(word)) {
755
+ indexOfFirstTag = i;
756
+ break;
757
+ }
758
+ }
759
+ if (indexOfFirstTag !== null) {
760
+ const items = words.slice(0, indexOfFirstTag);
761
+ if (indexOfFirstTag > 0) words.splice(0, indexOfFirstTag);
762
+ return items;
763
+ }
764
+ const items = words.slice(0);
765
+ words.splice(0, words.length);
766
+ return items;
767
+ }
768
+ operations() {
769
+ let positionInOld = 0;
770
+ let positionInNew = 0;
771
+ const operations = [];
772
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
773
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
774
+ const matches = this.matchingBlocks();
775
+ matches.push(new Match(wordsForDiffOld.length, wordsForDiffNew.length, 0));
776
+ const matchesWithoutOrphans = this.removeOrphans(matches);
777
+ for (const match of matchesWithoutOrphans) {
778
+ const matchStartsAtCurrentPositionInOld = positionInOld === match.startInOld;
779
+ const matchStartsAtCurrentPositionInNew = positionInNew === match.startInNew;
780
+ let action;
781
+ if (!matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) action = 4;
782
+ else if (matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) action = 2;
783
+ else if (!matchStartsAtCurrentPositionInOld) action = 1;
784
+ else action = 3;
785
+ if (action !== 3) operations.push(new Operation(action, positionInOld, match.startInOld, positionInNew, match.startInNew));
786
+ if (match.size !== 0) operations.push(new Operation(0, match.startInOld, match.endInOld, match.startInNew, match.endInNew));
787
+ positionInOld = match.endInOld;
788
+ positionInNew = match.endInNew;
789
+ }
790
+ return operations;
791
+ }
792
+ *removeOrphans(matches) {
793
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
794
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
795
+ let prev = new Match(0, 0, 0);
796
+ let curr = null;
797
+ for (const next of matches) {
798
+ if (curr === null) {
799
+ curr = next;
800
+ continue;
801
+ }
802
+ if (prev.endInOld === curr.startInOld && prev.endInNew === curr.startInNew || curr.endInOld === next.startInOld && curr.endInNew === next.startInNew) {
803
+ yield curr;
804
+ prev = curr;
805
+ curr = next;
806
+ continue;
807
+ }
808
+ let oldDistanceInChars = 0;
809
+ for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
810
+ let newDistanceInChars = 0;
811
+ for (let i = prev.endInNew; i < next.startInNew; i++) newDistanceInChars += wordsForDiffNew[i].length;
812
+ let currMatchLengthInChars = 0;
813
+ for (let i = curr.startInNew; i < curr.endInNew; i++) currMatchLengthInChars += wordsForDiffNew[i].length;
814
+ if (currMatchLengthInChars > Math.max(oldDistanceInChars, newDistanceInChars) * this.orphanMatchThreshold) yield curr;
815
+ prev = curr;
816
+ curr = next;
817
+ }
818
+ if (curr !== null) yield curr;
819
+ }
820
+ matchingBlocks() {
821
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
822
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
823
+ const matchingBlocks = [];
824
+ this.findMatchingBlocks(0, wordsForDiffOld.length, 0, wordsForDiffNew.length, matchingBlocks);
825
+ return matchingBlocks;
826
+ }
827
+ findMatchingBlocks(startInOld, endInOld, startInNew, endInNew, matchingBlocks) {
828
+ const match = this.findMatch(startInOld, endInOld, startInNew, endInNew);
829
+ if (match !== null) {
830
+ if (startInOld < match.startInOld && startInNew < match.startInNew) this.findMatchingBlocks(startInOld, match.startInOld, startInNew, match.startInNew, matchingBlocks);
831
+ matchingBlocks.push(match);
832
+ if (match.endInOld < endInOld && match.endInNew < endInNew) this.findMatchingBlocks(match.endInOld, endInOld, match.endInNew, endInNew, matchingBlocks);
833
+ }
834
+ }
835
+ findMatch(startInOld, endInOld, startInNew, endInNew) {
836
+ const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
837
+ const wordsForDiffNew = this.newContentWords ?? this.newWords;
838
+ for (let i = this.matchGranularity; i > 0; i--) {
839
+ const match = new MatchFinder(wordsForDiffOld, wordsForDiffNew, startInOld, endInOld, startInNew, endInNew, {
840
+ blockSize: i,
841
+ repeatingWordsAccuracy: this.repeatingWordsAccuracy,
842
+ ignoreWhitespaceDifferences: this.ignoreWhitespaceDifferences
843
+ }).findMatch();
844
+ if (match !== null) return match;
845
+ }
846
+ return null;
847
+ }
849
848
  };
849
+ //#endregion
850
+ module.exports = HtmlDiff;
851
+
850
852
  //# sourceMappingURL=HtmlDiff.cjs.map