@createiq/htmldiff 1.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +1038 -5
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +16 -2
- package/dist/HtmlDiff.d.mts +16 -2
- package/dist/HtmlDiff.mjs +1038 -5
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +5 -5
- package/src/HtmlDiff.ts +51 -5
- package/src/TableDiff.ts +1508 -0
- package/test/HtmlDiff.spec.ts +1 -1
- package/test/HtmlDiff.tables.matrix.spec.ts +372 -0
- package/test/HtmlDiff.tables.spec.ts +2069 -0
- package/test/TableDiff.bench.ts +244 -0
package/src/HtmlDiff.ts
CHANGED
|
@@ -2,6 +2,7 @@ import Action from './Action'
|
|
|
2
2
|
import Match from './Match'
|
|
3
3
|
import MatchFinder from './MatchFinder'
|
|
4
4
|
import Operation from './Operation'
|
|
5
|
+
import { preprocessTables, restoreTablePlaceholders } from './TableDiff'
|
|
5
6
|
import Utils from './Utils'
|
|
6
7
|
import WordSplitter from './WordSplitter'
|
|
7
8
|
|
|
@@ -64,9 +65,21 @@ export default class HtmlDiff {
|
|
|
64
65
|
'span',
|
|
65
66
|
])
|
|
66
67
|
|
|
68
|
+
/**
|
|
69
|
+
* Hard cap on nested `HtmlDiff.execute` calls (table preprocessing
|
|
70
|
+
* recurses through `diffCell` for cell content). Each level allocates
|
|
71
|
+
* fresh DP matrices and word arrays; without a guard a maliciously
|
|
72
|
+
* nested table-in-cell-in-table-in-cell input could blow stack and
|
|
73
|
+
* memory. Set high enough to comfortably handle real legal documents
|
|
74
|
+
* (tables nested 2-3 deep at most), low enough to short-circuit
|
|
75
|
+
* pathological input.
|
|
76
|
+
*/
|
|
77
|
+
private static MaxTablePreprocessDepth = 8
|
|
78
|
+
|
|
67
79
|
private content: string[] = []
|
|
68
80
|
private newText: string
|
|
69
81
|
private oldText: string
|
|
82
|
+
private readonly tablePreprocessDepth: number
|
|
70
83
|
|
|
71
84
|
private specialTagDiffStack: string[] = []
|
|
72
85
|
private newWords: string[] = []
|
|
@@ -134,14 +147,18 @@ export default class HtmlDiff {
|
|
|
134
147
|
* Initializes a new instance of the class.
|
|
135
148
|
* @param oldText The old text.
|
|
136
149
|
* @param newText The new text.
|
|
150
|
+
* @param tablePreprocessDepth Internal: nested-call depth for table
|
|
151
|
+
* preprocessing. Callers should leave at default (0); the recursive
|
|
152
|
+
* `diffCell` callback in TableDiff bumps it.
|
|
137
153
|
*/
|
|
138
|
-
constructor(oldText: string, newText: string) {
|
|
154
|
+
constructor(oldText: string, newText: string, tablePreprocessDepth = 0) {
|
|
139
155
|
this.oldText = oldText
|
|
140
156
|
this.newText = newText
|
|
157
|
+
this.tablePreprocessDepth = tablePreprocessDepth
|
|
141
158
|
}
|
|
142
159
|
|
|
143
|
-
static execute(oldText: string, newText: string) {
|
|
144
|
-
return new HtmlDiff(oldText, newText).build()
|
|
160
|
+
static execute(oldText: string, newText: string, tablePreprocessDepth = 0) {
|
|
161
|
+
return new HtmlDiff(oldText, newText, tablePreprocessDepth).build()
|
|
145
162
|
}
|
|
146
163
|
|
|
147
164
|
/**
|
|
@@ -154,6 +171,34 @@ export default class HtmlDiff {
|
|
|
154
171
|
return this.newText
|
|
155
172
|
}
|
|
156
173
|
|
|
174
|
+
// Table preprocessing: when both sides have matching `<table>` structures,
|
|
175
|
+
// diff cells positionally so cross-cell content shifts produce one
|
|
176
|
+
// independent del/ins per cell rather than cell-misaligned output.
|
|
177
|
+
// Recursion guarded by MaxTablePreprocessDepth to bound work on
|
|
178
|
+
// deeply-nested table-in-cell-in-table inputs. Caller-configured
|
|
179
|
+
// settings (block expressions, accuracy thresholds) are propagated to
|
|
180
|
+
// the recursive cell diff so cell-level output is consistent with the
|
|
181
|
+
// top-level configuration.
|
|
182
|
+
const blockExpressions = this.blockExpressions
|
|
183
|
+
const repeatingWordsAccuracy = this.repeatingWordsAccuracy
|
|
184
|
+
const orphanMatchThreshold = this.orphanMatchThreshold
|
|
185
|
+
const ignoreWhitespaceDifferences = this.ignoreWhitespaceDifferences
|
|
186
|
+
const tablePreprocess =
|
|
187
|
+
this.tablePreprocessDepth >= HtmlDiff.MaxTablePreprocessDepth
|
|
188
|
+
? null
|
|
189
|
+
: preprocessTables(this.oldText, this.newText, (oldCell, newCell) => {
|
|
190
|
+
const inner = new HtmlDiff(oldCell, newCell, this.tablePreprocessDepth + 1)
|
|
191
|
+
for (const expr of blockExpressions) inner.addBlockExpression(expr)
|
|
192
|
+
inner.repeatingWordsAccuracy = repeatingWordsAccuracy
|
|
193
|
+
inner.orphanMatchThreshold = orphanMatchThreshold
|
|
194
|
+
inner.ignoreWhitespaceDifferences = ignoreWhitespaceDifferences
|
|
195
|
+
return inner.build()
|
|
196
|
+
})
|
|
197
|
+
if (tablePreprocess) {
|
|
198
|
+
this.oldText = tablePreprocess.modifiedOld
|
|
199
|
+
this.newText = tablePreprocess.modifiedNew
|
|
200
|
+
}
|
|
201
|
+
|
|
157
202
|
this.splitInputsToWords()
|
|
158
203
|
this.buildContentProjections()
|
|
159
204
|
|
|
@@ -170,7 +215,8 @@ export default class HtmlDiff {
|
|
|
170
215
|
this.performOperation(op)
|
|
171
216
|
}
|
|
172
217
|
|
|
173
|
-
|
|
218
|
+
const result = this.content.join('')
|
|
219
|
+
return tablePreprocess ? restoreTablePlaceholders(result, tablePreprocess.placeholderToDiff) : result
|
|
174
220
|
}
|
|
175
221
|
|
|
176
222
|
/**
|
|
@@ -517,7 +563,7 @@ export default class HtmlDiff {
|
|
|
517
563
|
const openingAndClosingTagsMatch =
|
|
518
564
|
!!openingTag && Utils.getTagName(openingTag) === Utils.getTagName(words[tagIndexToCompare])
|
|
519
565
|
|
|
520
|
-
if (
|
|
566
|
+
if (openingTag && openingAndClosingTagsMatch) {
|
|
521
567
|
specialCaseTagInjection = '</ins>'
|
|
522
568
|
specialCaseTagInjectionIsBefore = true
|
|
523
569
|
}
|