@createiq/htmldiff 1.2.0-beta.6 → 1.2.0-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +58 -4
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +53 -4
- package/dist/HtmlDiff.d.mts +53 -4
- package/dist/HtmlDiff.mjs +53 -4
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +58 -5
- package/src/ThreeWayTable.ts +3 -0
- package/test/HtmlDiff.threeWay.spec.ts +55 -7
package/dist/HtmlDiff.d.cts
CHANGED
|
@@ -63,6 +63,37 @@ interface AnalyzeResult {
|
|
|
63
63
|
* `evaluateProjectionApplicability` results.
|
|
64
64
|
*/
|
|
65
65
|
type ThreeWayOptions = AnalyzeOptions;
|
|
66
|
+
/**
|
|
67
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
68
|
+
* track-changes rendering for legal-document rewrites.
|
|
69
|
+
*
|
|
70
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
71
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
72
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
73
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
74
|
+
* dramatically more readable for legal text.
|
|
75
|
+
*
|
|
76
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
77
|
+
* Commercial One CP, May 2026):
|
|
78
|
+
* - short edits (typo / one-word insert): output identical to
|
|
79
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
80
|
+
* trivially clears the bar;
|
|
81
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
82
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
83
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
84
|
+
* Word's 1+1 and a major readability win;
|
|
85
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
86
|
+
* formatting changes into a single block — too aggressive.
|
|
87
|
+
*
|
|
88
|
+
* Consumers rendering legal documents should spread this into their
|
|
89
|
+
* options:
|
|
90
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
91
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
92
|
+
*
|
|
93
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
94
|
+
* keep the bare default.
|
|
95
|
+
*/
|
|
96
|
+
declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
|
|
66
97
|
declare class HtmlDiff {
|
|
67
98
|
/**
|
|
68
99
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -173,6 +204,16 @@ declare class HtmlDiff {
|
|
|
173
204
|
* @param newText The new text.
|
|
174
205
|
*/
|
|
175
206
|
constructor(oldText: string, newText: string);
|
|
207
|
+
/**
|
|
208
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
209
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
210
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
211
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
212
|
+
*
|
|
213
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
214
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
215
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
216
|
+
*/
|
|
176
217
|
static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
|
|
177
218
|
/**
|
|
178
219
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
@@ -237,9 +278,17 @@ declare class HtmlDiff {
|
|
|
237
278
|
* `insertTag` and push raw, so the stack entry for the open is
|
|
238
279
|
* never popped. Rather than throw — which forces the caller's UI
|
|
239
280
|
* into an error boundary — close every leftover wrap with `</ins>`
|
|
240
|
-
* at the end of emission.
|
|
241
|
-
*
|
|
242
|
-
*
|
|
281
|
+
* at the end of emission.
|
|
282
|
+
*
|
|
283
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
284
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
285
|
+
* postInject regardless of whether the outer segment is ins or
|
|
286
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
287
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
288
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
289
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
290
|
+
* output is usually acceptable — but the warning IS the signal
|
|
291
|
+
* that the input had a real imbalance worth investigating.
|
|
243
292
|
*/
|
|
244
293
|
private static emitSegments;
|
|
245
294
|
/**
|
|
@@ -343,5 +392,5 @@ declare class HtmlDiff {
|
|
|
343
392
|
private findMatch;
|
|
344
393
|
}
|
|
345
394
|
//#endregion
|
|
346
|
-
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
|
|
395
|
+
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
|
|
347
396
|
//# sourceMappingURL=HtmlDiff.d.cts.map
|
package/dist/HtmlDiff.d.mts
CHANGED
|
@@ -63,6 +63,37 @@ interface AnalyzeResult {
|
|
|
63
63
|
* `evaluateProjectionApplicability` results.
|
|
64
64
|
*/
|
|
65
65
|
type ThreeWayOptions = AnalyzeOptions;
|
|
66
|
+
/**
|
|
67
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
68
|
+
* track-changes rendering for legal-document rewrites.
|
|
69
|
+
*
|
|
70
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
71
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
72
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
73
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
74
|
+
* dramatically more readable for legal text.
|
|
75
|
+
*
|
|
76
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
77
|
+
* Commercial One CP, May 2026):
|
|
78
|
+
* - short edits (typo / one-word insert): output identical to
|
|
79
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
80
|
+
* trivially clears the bar;
|
|
81
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
82
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
83
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
84
|
+
* Word's 1+1 and a major readability win;
|
|
85
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
86
|
+
* formatting changes into a single block — too aggressive.
|
|
87
|
+
*
|
|
88
|
+
* Consumers rendering legal documents should spread this into their
|
|
89
|
+
* options:
|
|
90
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
91
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
92
|
+
*
|
|
93
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
94
|
+
* keep the bare default.
|
|
95
|
+
*/
|
|
96
|
+
declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
|
|
66
97
|
declare class HtmlDiff {
|
|
67
98
|
/**
|
|
68
99
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -173,6 +204,16 @@ declare class HtmlDiff {
|
|
|
173
204
|
* @param newText The new text.
|
|
174
205
|
*/
|
|
175
206
|
constructor(oldText: string, newText: string);
|
|
207
|
+
/**
|
|
208
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
209
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
210
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
211
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
212
|
+
*
|
|
213
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
214
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
215
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
216
|
+
*/
|
|
176
217
|
static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
|
|
177
218
|
/**
|
|
178
219
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
@@ -237,9 +278,17 @@ declare class HtmlDiff {
|
|
|
237
278
|
* `insertTag` and push raw, so the stack entry for the open is
|
|
238
279
|
* never popped. Rather than throw — which forces the caller's UI
|
|
239
280
|
* into an error boundary — close every leftover wrap with `</ins>`
|
|
240
|
-
* at the end of emission.
|
|
241
|
-
*
|
|
242
|
-
*
|
|
281
|
+
* at the end of emission.
|
|
282
|
+
*
|
|
283
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
284
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
285
|
+
* postInject regardless of whether the outer segment is ins or
|
|
286
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
287
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
288
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
289
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
290
|
+
* output is usually acceptable — but the warning IS the signal
|
|
291
|
+
* that the input had a real imbalance worth investigating.
|
|
243
292
|
*/
|
|
244
293
|
private static emitSegments;
|
|
245
294
|
/**
|
|
@@ -343,5 +392,5 @@ declare class HtmlDiff {
|
|
|
343
392
|
private findMatch;
|
|
344
393
|
}
|
|
345
394
|
//#endregion
|
|
346
|
-
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
|
|
395
|
+
export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
|
|
347
396
|
//# sourceMappingURL=HtmlDiff.d.mts.map
|
package/dist/HtmlDiff.mjs
CHANGED
|
@@ -2035,6 +2035,37 @@ var BlockFinder = class {
|
|
|
2035
2035
|
};
|
|
2036
2036
|
//#endregion
|
|
2037
2037
|
//#region src/HtmlDiff.ts
|
|
2038
|
+
/**
|
|
2039
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
2040
|
+
* track-changes rendering for legal-document rewrites.
|
|
2041
|
+
*
|
|
2042
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
2043
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
2044
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
2045
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
2046
|
+
* dramatically more readable for legal text.
|
|
2047
|
+
*
|
|
2048
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
2049
|
+
* Commercial One CP, May 2026):
|
|
2050
|
+
* - short edits (typo / one-word insert): output identical to
|
|
2051
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
2052
|
+
* trivially clears the bar;
|
|
2053
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
2054
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
2055
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
2056
|
+
* Word's 1+1 and a major readability win;
|
|
2057
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
2058
|
+
* formatting changes into a single block — too aggressive.
|
|
2059
|
+
*
|
|
2060
|
+
* Consumers rendering legal documents should spread this into their
|
|
2061
|
+
* options:
|
|
2062
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
2063
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
2064
|
+
*
|
|
2065
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
2066
|
+
* keep the bare default.
|
|
2067
|
+
*/
|
|
2068
|
+
const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
|
|
2038
2069
|
var HtmlDiff = class HtmlDiff {
|
|
2039
2070
|
/**
|
|
2040
2071
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -2187,6 +2218,16 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2187
2218
|
this.oldText = oldText;
|
|
2188
2219
|
this.newText = newText;
|
|
2189
2220
|
}
|
|
2221
|
+
/**
|
|
2222
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
2223
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
2224
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
2225
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
2226
|
+
*
|
|
2227
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
2228
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
2229
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
2230
|
+
*/
|
|
2190
2231
|
static execute(oldText, newText, options = {}) {
|
|
2191
2232
|
const inner = new HtmlDiff(oldText, newText);
|
|
2192
2233
|
if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
|
|
@@ -2316,9 +2357,17 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2316
2357
|
* `insertTag` and push raw, so the stack entry for the open is
|
|
2317
2358
|
* never popped. Rather than throw — which forces the caller's UI
|
|
2318
2359
|
* into an error boundary — close every leftover wrap with `</ins>`
|
|
2319
|
-
* at the end of emission.
|
|
2320
|
-
*
|
|
2321
|
-
*
|
|
2360
|
+
* at the end of emission.
|
|
2361
|
+
*
|
|
2362
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
2363
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
2364
|
+
* postInject regardless of whether the outer segment is ins or
|
|
2365
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
2366
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
2367
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
2368
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
2369
|
+
* output is usually acceptable — but the warning IS the signal
|
|
2370
|
+
* that the input had a real imbalance worth investigating.
|
|
2322
2371
|
*/
|
|
2323
2372
|
static emitSegments(segments) {
|
|
2324
2373
|
const emitter = new HtmlDiff("", "");
|
|
@@ -2744,6 +2793,6 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2744
2793
|
}
|
|
2745
2794
|
};
|
|
2746
2795
|
//#endregion
|
|
2747
|
-
export { HtmlDiff as default };
|
|
2796
|
+
export { WORD_ALIGNED_OPTIONS, HtmlDiff as default };
|
|
2748
2797
|
|
|
2749
2798
|
//# sourceMappingURL=HtmlDiff.mjs.map
|