@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +249 -52
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +72 -18
- package/dist/HtmlDiff.d.mts +72 -18
- package/dist/HtmlDiff.mjs +244 -52
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +172 -48
- package/src/ThreeWayDiff.ts +58 -11
- package/src/ThreeWayTable.ts +143 -9
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +232 -6
- package/test/HtmlDiff.threeWay.tables.spec.ts +111 -1
- package/test/Utils.spec.ts +3 -3
package/dist/HtmlDiff.d.cts
CHANGED
|
@@ -63,6 +63,37 @@ interface AnalyzeResult {
|
|
|
63
63
|
* `evaluateProjectionApplicability` results.
|
|
64
64
|
*/
|
|
65
65
|
type ThreeWayOptions = AnalyzeOptions;
|
|
66
|
+
/**
|
|
67
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
68
|
+
* track-changes rendering for legal-document rewrites.
|
|
69
|
+
*
|
|
70
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
71
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
72
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
73
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
74
|
+
* dramatically more readable for legal text.
|
|
75
|
+
*
|
|
76
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
77
|
+
* Commercial One CP, May 2026):
|
|
78
|
+
* - short edits (typo / one-word insert): output identical to
|
|
79
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
80
|
+
* trivially clears the bar;
|
|
81
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
82
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
83
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
84
|
+
* Word's 1+1 and a major readability win;
|
|
85
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
86
|
+
* formatting changes into a single block — too aggressive.
|
|
87
|
+
*
|
|
88
|
+
* Consumers rendering legal documents should spread this into their
|
|
89
|
+
* options:
|
|
90
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
91
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
92
|
+
*
|
|
93
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
94
|
+
* keep the bare default.
|
|
95
|
+
*/
|
|
96
|
+
declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
|
|
66
97
|
declare class HtmlDiff {
|
|
67
98
|
/**
|
|
68
99
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -99,6 +130,16 @@ declare class HtmlDiff {
|
|
|
99
130
|
private newText;
|
|
100
131
|
private oldText;
|
|
101
132
|
private tablePreprocessDepth;
|
|
133
|
+
/**
|
|
134
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
135
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
136
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
137
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
138
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
139
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
140
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
141
|
+
* handler for the split logic.
|
|
142
|
+
*/
|
|
102
143
|
private specialTagDiffStack;
|
|
103
144
|
private newWords;
|
|
104
145
|
private oldWords;
|
|
@@ -163,7 +204,17 @@ declare class HtmlDiff {
|
|
|
163
204
|
* @param newText The new text.
|
|
164
205
|
*/
|
|
165
206
|
constructor(oldText: string, newText: string);
|
|
166
|
-
|
|
207
|
+
/**
|
|
208
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
209
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
210
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
211
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
212
|
+
*
|
|
213
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
214
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
215
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
216
|
+
*/
|
|
217
|
+
static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
|
|
167
218
|
/**
|
|
168
219
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
169
220
|
* arrays the diff ran against, the operations produced, the original
|
|
@@ -192,22 +243,6 @@ declare class HtmlDiff {
|
|
|
192
243
|
* why symmetry matters.
|
|
193
244
|
*/
|
|
194
245
|
static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
|
|
195
|
-
/**
|
|
196
|
-
* Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
|
|
197
|
-
* version CP sent back), and V3 (Me's current draft), produces a
|
|
198
|
-
* single attributed HTML output where CP's and Me's changes are
|
|
199
|
-
* distinguished by `data-author` ('cp' or 'me') and matching
|
|
200
|
-
* `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
|
|
201
|
-
* CP's proposal" case (Me deleted text CP had inserted) gets a
|
|
202
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
203
|
-
*
|
|
204
|
-
* Coordinates the symmetric-projection decision (D1) across both
|
|
205
|
-
* internal `analyze` calls so V2 tokenises identically on each side
|
|
206
|
-
* of the spine. When `useProjections` is left undefined, the decision
|
|
207
|
-
* is the conjunction of both pair-wise heuristics — project iff both
|
|
208
|
-
* pairs would project on their own. Pass an explicit boolean to
|
|
209
|
-
* override.
|
|
210
|
-
*/
|
|
211
246
|
/**
|
|
212
247
|
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
213
248
|
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
@@ -235,6 +270,25 @@ declare class HtmlDiff {
|
|
|
235
270
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
236
271
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
237
272
|
* opened in one segment and closed in another stays balanced.
|
|
273
|
+
*
|
|
274
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
275
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
276
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
277
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
278
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
279
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
280
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
281
|
+
* at the end of emission.
|
|
282
|
+
*
|
|
283
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
284
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
285
|
+
* postInject regardless of whether the outer segment is ins or
|
|
286
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
287
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
288
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
289
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
290
|
+
* output is usually acceptable — but the warning IS the signal
|
|
291
|
+
* that the input had a real imbalance worth investigating.
|
|
238
292
|
*/
|
|
239
293
|
private static emitSegments;
|
|
240
294
|
/**
|
|
@@ -338,5 +392,5 @@ declare class HtmlDiff {
|
|
|
338
392
|
private findMatch;
|
|
339
393
|
}
|
|
340
394
|
//#endregion
|
|
341
|
-
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
|
|
395
|
+
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
|
|
342
396
|
//# sourceMappingURL=HtmlDiff.d.cts.map
|
package/dist/HtmlDiff.d.mts
CHANGED
|
@@ -63,6 +63,37 @@ interface AnalyzeResult {
|
|
|
63
63
|
* `evaluateProjectionApplicability` results.
|
|
64
64
|
*/
|
|
65
65
|
type ThreeWayOptions = AnalyzeOptions;
|
|
66
|
+
/**
|
|
67
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
68
|
+
* track-changes rendering for legal-document rewrites.
|
|
69
|
+
*
|
|
70
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
71
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
72
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
73
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
74
|
+
* dramatically more readable for legal text.
|
|
75
|
+
*
|
|
76
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
77
|
+
* Commercial One CP, May 2026):
|
|
78
|
+
* - short edits (typo / one-word insert): output identical to
|
|
79
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
80
|
+
* trivially clears the bar;
|
|
81
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
82
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
83
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
84
|
+
* Word's 1+1 and a major readability win;
|
|
85
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
86
|
+
* formatting changes into a single block — too aggressive.
|
|
87
|
+
*
|
|
88
|
+
* Consumers rendering legal documents should spread this into their
|
|
89
|
+
* options:
|
|
90
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
91
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
92
|
+
*
|
|
93
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
94
|
+
* keep the bare default.
|
|
95
|
+
*/
|
|
96
|
+
declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
|
|
66
97
|
declare class HtmlDiff {
|
|
67
98
|
/**
|
|
68
99
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -99,6 +130,16 @@ declare class HtmlDiff {
|
|
|
99
130
|
private newText;
|
|
100
131
|
private oldText;
|
|
101
132
|
private tablePreprocessDepth;
|
|
133
|
+
/**
|
|
134
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
135
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
136
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
137
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
138
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
139
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
140
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
141
|
+
* handler for the split logic.
|
|
142
|
+
*/
|
|
102
143
|
private specialTagDiffStack;
|
|
103
144
|
private newWords;
|
|
104
145
|
private oldWords;
|
|
@@ -163,7 +204,17 @@ declare class HtmlDiff {
|
|
|
163
204
|
* @param newText The new text.
|
|
164
205
|
*/
|
|
165
206
|
constructor(oldText: string, newText: string);
|
|
166
|
-
|
|
207
|
+
/**
|
|
208
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
209
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
210
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
211
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
212
|
+
*
|
|
213
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
214
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
215
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
216
|
+
*/
|
|
217
|
+
static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
|
|
167
218
|
/**
|
|
168
219
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
169
220
|
* arrays the diff ran against, the operations produced, the original
|
|
@@ -192,22 +243,6 @@ declare class HtmlDiff {
|
|
|
192
243
|
* why symmetry matters.
|
|
193
244
|
*/
|
|
194
245
|
static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
|
|
195
|
-
/**
|
|
196
|
-
* Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
|
|
197
|
-
* version CP sent back), and V3 (Me's current draft), produces a
|
|
198
|
-
* single attributed HTML output where CP's and Me's changes are
|
|
199
|
-
* distinguished by `data-author` ('cp' or 'me') and matching
|
|
200
|
-
* `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
|
|
201
|
-
* CP's proposal" case (Me deleted text CP had inserted) gets a
|
|
202
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
203
|
-
*
|
|
204
|
-
* Coordinates the symmetric-projection decision (D1) across both
|
|
205
|
-
* internal `analyze` calls so V2 tokenises identically on each side
|
|
206
|
-
* of the spine. When `useProjections` is left undefined, the decision
|
|
207
|
-
* is the conjunction of both pair-wise heuristics — project iff both
|
|
208
|
-
* pairs would project on their own. Pass an explicit boolean to
|
|
209
|
-
* override.
|
|
210
|
-
*/
|
|
211
246
|
/**
|
|
212
247
|
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
213
248
|
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
@@ -235,6 +270,25 @@ declare class HtmlDiff {
|
|
|
235
270
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
236
271
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
237
272
|
* opened in one segment and closed in another stays balanced.
|
|
273
|
+
*
|
|
274
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
275
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
276
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
277
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
278
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
279
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
280
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
281
|
+
* at the end of emission.
|
|
282
|
+
*
|
|
283
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
284
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
285
|
+
* postInject regardless of whether the outer segment is ins or
|
|
286
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
287
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
288
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
289
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
290
|
+
* output is usually acceptable — but the warning IS the signal
|
|
291
|
+
* that the input had a real imbalance worth investigating.
|
|
238
292
|
*/
|
|
239
293
|
private static emitSegments;
|
|
240
294
|
/**
|
|
@@ -338,5 +392,5 @@ declare class HtmlDiff {
|
|
|
338
392
|
private findMatch;
|
|
339
393
|
}
|
|
340
394
|
//#endregion
|
|
341
|
-
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
|
|
395
|
+
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
|
|
342
396
|
//# sourceMappingURL=HtmlDiff.d.mts.map
|