@createiq/htmldiff 1.0.4 → 1.0.5-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/dist/HtmlDiff.cjs +881 -46
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +55 -19
- package/dist/HtmlDiff.d.mts +55 -19
- package/dist/HtmlDiff.mjs +881 -46
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +8 -8
- package/src/HtmlDiff.ts +156 -69
- package/src/TableDiff.ts +1196 -0
- package/test/HtmlDiff.spec.ts +119 -1
- package/test/HtmlDiff.tables.spec.ts +1419 -0
- package/test/TableDiff.bench.ts +244 -0
package/dist/HtmlDiff.d.mts
CHANGED
|
@@ -11,9 +11,20 @@ declare class HtmlDiff {
|
|
|
11
11
|
private static SpecialCaseClosingTagsSet;
|
|
12
12
|
private static SpecialCaseOpeningTagRegex;
|
|
13
13
|
private static FormattingTags;
|
|
14
|
+
/**
|
|
15
|
+
* Hard cap on nested `HtmlDiff.execute` calls (table preprocessing
|
|
16
|
+
* recurses through `diffCell` for cell content). Each level allocates
|
|
17
|
+
* fresh DP matrices and word arrays; without a guard a maliciously
|
|
18
|
+
* nested table-in-cell-in-table-in-cell input could blow stack and
|
|
19
|
+
* memory. Set high enough to comfortably handle real legal documents
|
|
20
|
+
* (tables nested 2-3 deep at most), low enough to short-circuit
|
|
21
|
+
* pathological input.
|
|
22
|
+
*/
|
|
23
|
+
private static MaxTablePreprocessDepth;
|
|
14
24
|
private content;
|
|
15
25
|
private newText;
|
|
16
26
|
private oldText;
|
|
27
|
+
private readonly tablePreprocessDepth;
|
|
17
28
|
private specialTagDiffStack;
|
|
18
29
|
private newWords;
|
|
19
30
|
private oldWords;
|
|
@@ -26,8 +37,17 @@ declare class HtmlDiff {
|
|
|
26
37
|
/** Maps content-word index → original word index */
|
|
27
38
|
private oldContentToOriginal;
|
|
28
39
|
private newContentToOriginal;
|
|
29
|
-
/**
|
|
40
|
+
/**
|
|
41
|
+
* Tracks the next unwritten word index in oldWords/newWords. Mutated only by
|
|
42
|
+
* {@link sliceOriginalWordsForOp} (each op reads a slice and advances its cursor).
|
|
43
|
+
* Advances monotonically. Used so:
|
|
44
|
+
* - subsequent equal/delete ops know where in old to resume from
|
|
45
|
+
* - subsequent insert ops know where in new to resume from
|
|
46
|
+
* The two cursors are independent: equal/delete output from old and advance the old
|
|
47
|
+
* cursor; insert outputs from new and advances the new cursor.
|
|
48
|
+
*/
|
|
30
49
|
private lastOriginalOldOutputIndex;
|
|
50
|
+
private lastOriginalNewOutputIndex;
|
|
31
51
|
private matchGranularity;
|
|
32
52
|
private blockExpressions;
|
|
33
53
|
/**
|
|
@@ -67,9 +87,12 @@ declare class HtmlDiff {
|
|
|
67
87
|
* Initializes a new instance of the class.
|
|
68
88
|
* @param oldText The old text.
|
|
69
89
|
* @param newText The new text.
|
|
90
|
+
* @param tablePreprocessDepth Internal: nested-call depth for table
|
|
91
|
+
* preprocessing. Callers should leave at default (0); the recursive
|
|
92
|
+
* `diffCell` callback in TableDiff bumps it.
|
|
70
93
|
*/
|
|
71
|
-
constructor(oldText: string, newText: string);
|
|
72
|
-
static execute(oldText: string, newText: string): string;
|
|
94
|
+
constructor(oldText: string, newText: string, tablePreprocessDepth?: number);
|
|
95
|
+
static execute(oldText: string, newText: string, tablePreprocessDepth?: number): string;
|
|
73
96
|
/**
|
|
74
97
|
* Builds the HTML diff output
|
|
75
98
|
* @return HTML diff markup
|
|
@@ -82,11 +105,18 @@ declare class HtmlDiff {
|
|
|
82
105
|
addBlockExpression(expression: RegExp): void;
|
|
83
106
|
private splitInputsToWords;
|
|
84
107
|
/**
|
|
85
|
-
*
|
|
86
|
-
*
|
|
87
|
-
*
|
|
108
|
+
* Builds "content projections" — word arrays with structural wrapper tags stripped — when
|
|
109
|
+
* structural normalization is appropriate for these inputs. The diff algorithm operates on
|
|
110
|
+
* the projections so wrapper-tag differences (e.g. `<p>` vs `<div>`) don't appear as content
|
|
111
|
+
* changes; structural tags are then folded back in at output time.
|
|
88
112
|
*/
|
|
89
113
|
private buildContentProjections;
|
|
114
|
+
/**
|
|
115
|
+
* Decides whether structural normalization should be activated for this pair of inputs.
|
|
116
|
+
* Each clause is a distinct correctness or fitness check — extend by adding a named
|
|
117
|
+
* sub-predicate rather than chaining ad-hoc conditions.
|
|
118
|
+
*/
|
|
119
|
+
private static shouldUseContentProjections;
|
|
90
120
|
/**
|
|
91
121
|
* Tags that commonly serve as content wrappers and may change structurally
|
|
92
122
|
* without affecting the actual content. Only these tags are stripped during
|
|
@@ -94,6 +124,8 @@ declare class HtmlDiff {
|
|
|
94
124
|
*/
|
|
95
125
|
private static WrapperTags;
|
|
96
126
|
private static isStructuralTag;
|
|
127
|
+
/** True when the word is a structural opening tag (e.g. `<p>`, `<div>`). */
|
|
128
|
+
private static isOpeningStructuralTag;
|
|
97
129
|
/**
|
|
98
130
|
* Returns true if words between structural tags are just whitespace (indentation).
|
|
99
131
|
*/
|
|
@@ -105,21 +137,25 @@ declare class HtmlDiff {
|
|
|
105
137
|
private processInsertOperation;
|
|
106
138
|
private processDeleteOperation;
|
|
107
139
|
private processEqualOperation;
|
|
140
|
+
/** True when content projections are active for both sides — i.e. structural normalization is in effect. */
|
|
141
|
+
private usingContentProjections;
|
|
108
142
|
/**
|
|
109
|
-
*
|
|
110
|
-
*
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
*
|
|
115
|
-
*
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
*
|
|
120
|
-
*
|
|
143
|
+
* Returns the slice of original (old or new) words covering a content-index range,
|
|
144
|
+
* including the structural tags that surround the content. Advances the side's cursor
|
|
145
|
+
* past the slice so the next op resumes correctly.
|
|
146
|
+
*
|
|
147
|
+
* The slice extends:
|
|
148
|
+
* - LEADING: from the side's cursor (or the first content word's original index,
|
|
149
|
+
* whichever is smaller) so structural tags that precede the first content word
|
|
150
|
+
* are picked up by this op rather than left orphaned.
|
|
151
|
+
* - TRAILING (non-last range): from just after the last content word, including
|
|
152
|
+
* closing structural tags that close *this* op's paragraphs, but stopping at
|
|
153
|
+
* the first opening structural tag — that opening tag belongs to the next
|
|
154
|
+
* op's paragraph and would otherwise be emitted twice.
|
|
155
|
+
* - TRAILING (last range): all the way to the end of words, since there is no next
|
|
156
|
+
* op to claim the trailing tags.
|
|
121
157
|
*/
|
|
122
|
-
private
|
|
158
|
+
private sliceOriginalWordsForOp;
|
|
123
159
|
/**
|
|
124
160
|
* This method encloses words within a specified tag (ins or del), and adds this into "content",
|
|
125
161
|
* with a twist: if there are words contain tags, it actually creates multiple ins or del,
|