@createiq/htmldiff 1.0.3 → 1.0.4-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/.gitlab-ci.yml +5 -5
- package/biome.json +3 -0
- package/dist/HtmlDiff.cjs +803 -810
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +149 -104
- package/dist/HtmlDiff.d.mts +152 -0
- package/dist/HtmlDiff.mjs +843 -0
- package/dist/HtmlDiff.mjs.map +1 -0
- package/mise.toml +1 -1
- package/package.json +21 -14
- package/src/HtmlDiff.ts +199 -13
- package/test/HtmlDiff.spec.ts +67 -27
- package/test/structural1.html +366 -0
- package/test/structural2.html +366 -0
- package/{tsup.config.ts → tsdown.config.ts} +1 -3
- package/vitest.config.mts +1 -1
- package/dist/HtmlDiff.d.ts +0 -106
- package/dist/HtmlDiff.js +0 -827
- package/dist/HtmlDiff.js.map +0 -1
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
//#region src/HtmlDiff.d.ts
|
|
2
|
+
declare class HtmlDiff {
|
|
3
|
+
/**
|
|
4
|
+
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
5
|
+
* @private
|
|
6
|
+
*/
|
|
7
|
+
private static MatchGranularityMaximum;
|
|
8
|
+
private static DelTag;
|
|
9
|
+
private static InsTag;
|
|
10
|
+
private static SpecialCaseClosingTags;
|
|
11
|
+
private static SpecialCaseClosingTagsSet;
|
|
12
|
+
private static SpecialCaseOpeningTagRegex;
|
|
13
|
+
private static FormattingTags;
|
|
14
|
+
private content;
|
|
15
|
+
private newText;
|
|
16
|
+
private oldText;
|
|
17
|
+
private specialTagDiffStack;
|
|
18
|
+
private newWords;
|
|
19
|
+
private oldWords;
|
|
20
|
+
/**
|
|
21
|
+
* Content-only projections of oldWords/newWords (structural tags and adjacent whitespace removed).
|
|
22
|
+
* When null, no structural normalization is applied (the word arrays are identical for diffing).
|
|
23
|
+
*/
|
|
24
|
+
private oldContentWords;
|
|
25
|
+
private newContentWords;
|
|
26
|
+
/** Maps content-word index → original word index */
|
|
27
|
+
private oldContentToOriginal;
|
|
28
|
+
private newContentToOriginal;
|
|
29
|
+
private matchGranularity;
|
|
30
|
+
private blockExpressions;
|
|
31
|
+
/**
|
|
32
|
+
* Defines how to compare repeating words. Valid values are from 0 to 1.
|
|
33
|
+
* This value allows to exclude some words from comparison that eventually
|
|
34
|
+
* reduces the total time of the diff algorithm.
|
|
35
|
+
* 0 means that all words are excluded so the diff will not find any matching words at all.
|
|
36
|
+
* 1 (default value) means that all words participate in comparison so this is the most accurate case.
|
|
37
|
+
* 0.5 means that any word that occurs more than 50% times may be excluded from comparison. This doesn't
|
|
38
|
+
* mean that such words will definitely be excluded but only gives a permission to exclude them if necessary.
|
|
39
|
+
*/
|
|
40
|
+
repeatingWordsAccuracy: number;
|
|
41
|
+
/**
|
|
42
|
+
* If true all whitespaces are considered as equal
|
|
43
|
+
*/
|
|
44
|
+
ignoreWhitespaceDifferences: boolean;
|
|
45
|
+
/**
|
|
46
|
+
* If some match is too small and located far from its neighbors then it is considered as orphan
|
|
47
|
+
* and removed. For example:
|
|
48
|
+
* <code>
|
|
49
|
+
* aaaaa bb ccccccccc dddddd ee
|
|
50
|
+
* 11111 bb 222222222 dddddd ee
|
|
51
|
+
* </code>
|
|
52
|
+
* will find two matches <code>bb</code> and <code>dddddd ee</code> but the first will be considered
|
|
53
|
+
* as orphan and ignored, as result it will consider texts <code>aaaaa bb ccccccccc</code> and
|
|
54
|
+
* <code>11111 bb 222222222</code> as single replacement:
|
|
55
|
+
* <code>
|
|
56
|
+
* <del>aaaaa bb ccccccccc</del><ins>11111 bb 222222222</ins> dddddd ee
|
|
57
|
+
* </code>
|
|
58
|
+
* This property defines relative size of the match to be considered as orphan, from 0 to 1.
|
|
59
|
+
* 1 means that all matches will be considered as orphans.
|
|
60
|
+
* 0 (default) means that no match will be considered as orphan.
|
|
61
|
+
* 0.2 means that if match length is less than 20% of distance between its neighbors it is considered as orphan.
|
|
62
|
+
*/
|
|
63
|
+
orphanMatchThreshold: number;
|
|
64
|
+
/**
|
|
65
|
+
* Initializes a new instance of the class.
|
|
66
|
+
* @param oldText The old text.
|
|
67
|
+
* @param newText The new text.
|
|
68
|
+
*/
|
|
69
|
+
constructor(oldText: string, newText: string);
|
|
70
|
+
static execute(oldText: string, newText: string): string;
|
|
71
|
+
/**
|
|
72
|
+
* Builds the HTML diff output
|
|
73
|
+
* @return HTML diff markup
|
|
74
|
+
*/
|
|
75
|
+
build(): string;
|
|
76
|
+
/**
|
|
77
|
+
* Uses {@link expression} to group text together so that any change detected within the group is treated as a single block
|
|
78
|
+
* @param expression
|
|
79
|
+
*/
|
|
80
|
+
addBlockExpression(expression: RegExp): void;
|
|
81
|
+
private splitInputsToWords;
|
|
82
|
+
/**
|
|
83
|
+
* Checks whether the two word arrays have structural HTML differences (different non-formatting tags
|
|
84
|
+
* or different whitespace between structural tags). When they do, builds "content projections" that
|
|
85
|
+
* strip structural noise so the diff algorithm only sees meaningful content and formatting changes.
|
|
86
|
+
*/
|
|
87
|
+
private buildContentProjections;
|
|
88
|
+
/**
|
|
89
|
+
* Tags that commonly serve as content wrappers and may change structurally
|
|
90
|
+
* without affecting the actual content. Only these tags are stripped during
|
|
91
|
+
* structural normalization.
|
|
92
|
+
*/
|
|
93
|
+
private static WrapperTags;
|
|
94
|
+
private static isStructuralTag;
|
|
95
|
+
/**
|
|
96
|
+
* Returns true if words between structural tags are just whitespace (indentation).
|
|
97
|
+
*/
|
|
98
|
+
private static isStructuralWhitespace;
|
|
99
|
+
private static createContentProjection;
|
|
100
|
+
private static hasStructuralDifferences;
|
|
101
|
+
private performOperation;
|
|
102
|
+
private processReplaceOperation;
|
|
103
|
+
private processInsertOperation;
|
|
104
|
+
private processDeleteOperation;
|
|
105
|
+
private processEqualOperation;
|
|
106
|
+
/**
|
|
107
|
+
* Gets original old words for a content-index range, including only content and formatting tags
|
|
108
|
+
* (used for delete/replace operations where we don't want structural tags).
|
|
109
|
+
*/
|
|
110
|
+
private getOriginalOldWords;
|
|
111
|
+
/**
|
|
112
|
+
* Gets original new words for a content-index range, including only content and formatting tags
|
|
113
|
+
* (used for insert/replace operations where we don't want structural tags).
|
|
114
|
+
*/
|
|
115
|
+
private getOriginalNewWords;
|
|
116
|
+
/**
|
|
117
|
+
* Gets original old words for a content-index range, INCLUDING structural tags and whitespace
|
|
118
|
+
* between the content words (used for equal operations to preserve old HTML structure).
|
|
119
|
+
*/
|
|
120
|
+
private getOriginalOldWordsWithStructure;
|
|
121
|
+
/**
|
|
122
|
+
* This method encloses words within a specified tag (ins or del), and adds this into "content",
|
|
123
|
+
* with a twist: if there are words contain tags, it actually creates multiple ins or del,
|
|
124
|
+
* so that they don't include any ins or del. This handles cases like
|
|
125
|
+
* old: '<p>a</p>'
|
|
126
|
+
* new: '<p>ab</p>
|
|
127
|
+
* <p>
|
|
128
|
+
* c</b>'
|
|
129
|
+
* diff result: '<p>a<ins>b</ins></p>
|
|
130
|
+
* <p>
|
|
131
|
+
* <ins>c</ins>
|
|
132
|
+
* </p>
|
|
133
|
+
* '
|
|
134
|
+
* this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
|
|
135
|
+
* del tags), but handles correctly more cases than the earlier version.
|
|
136
|
+
* P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
|
|
137
|
+
* @param tag
|
|
138
|
+
* @param cssClass
|
|
139
|
+
* @param words
|
|
140
|
+
* @private
|
|
141
|
+
*/
|
|
142
|
+
private insertTag;
|
|
143
|
+
private extractConsecutiveWords;
|
|
144
|
+
private operations;
|
|
145
|
+
private removeOrphans;
|
|
146
|
+
private matchingBlocks;
|
|
147
|
+
private findMatchingBlocks;
|
|
148
|
+
private findMatch;
|
|
149
|
+
}
|
|
150
|
+
//#endregion
|
|
151
|
+
export { HtmlDiff as default };
|
|
152
|
+
//# sourceMappingURL=HtmlDiff.d.mts.map
|