@createiq/htmldiff 1.2.0-beta.5 → 1.2.0-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,37 @@ interface AnalyzeResult {
63
63
  * `evaluateProjectionApplicability` results.
64
64
  */
65
65
  type ThreeWayOptions = AnalyzeOptions;
66
+ /**
67
+ * Opinionated options that align htmldiff's output with Microsoft Word's
68
+ * track-changes rendering for legal-document rewrites.
69
+ *
70
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
71
+ * LCS match, however small — which fragments long sentence rewrites
72
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
73
+ * "shall"). Word collapses those into a single coarse del+ins, which is
74
+ * dramatically more readable for legal text.
75
+ *
76
+ * 0.25 was tuned empirically against a customer Word reference (US
77
+ * Commercial One CP, May 2026):
78
+ * - short edits (typo / one-word insert): output identical to
79
+ * threshold=0 — inter-match distances are tiny so every match
80
+ * trivially clears the bar;
81
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
82
+ * reference): previously produced 6 dels + 5 ins fragmented around
83
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
84
+ * Word's 1+1 and a major readability win;
85
+ * - higher values (0.3+) collapsed short edits containing inline
86
+ * formatting changes into a single block — too aggressive.
87
+ *
88
+ * Consumers rendering legal documents should spread this into their
89
+ * options:
90
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
91
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
92
+ *
93
+ * Other consumers (machine-readable diff, exact-token alignment) can
94
+ * keep the bare default.
95
+ */
96
+ declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
66
97
  declare class HtmlDiff {
67
98
  /**
68
99
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -173,7 +204,17 @@ declare class HtmlDiff {
173
204
  * @param newText The new text.
174
205
  */
175
206
  constructor(oldText: string, newText: string);
176
- static execute(oldText: string, newText: string): string;
207
+ /**
208
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
209
+ * `executeThreeWay`, with two intentional exceptions documented
210
+ * inline below. Consumers wanting Word-aligned output should spread
211
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
212
+ *
213
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
214
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
215
+ * the recursive cell diff can happen. Callers can't override that.
216
+ */
217
+ static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
177
218
  /**
178
219
  * Analyse a two-way diff and return its raw building blocks: the word
179
220
  * arrays the diff ran against, the operations produced, the original
@@ -237,9 +278,17 @@ declare class HtmlDiff {
237
278
  * `insertTag` and push raw, so the stack entry for the open is
238
279
  * never popped. Rather than throw — which forces the caller's UI
239
280
  * into an error boundary — close every leftover wrap with `</ins>`
240
- * at the end of emission. The resulting HTML has an extra
241
- * `</ins>` next to the formatting closer; DOMParser-normalisation
242
- * downstream produces sensible nesting.
281
+ * at the end of emission.
282
+ *
283
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
284
+ * opener pushed (every formatting opener emits an inner `<ins…>`
285
+ * postInject regardless of whether the outer segment is ins or
286
+ * del). For del-segment formatting openers the outer `<del>` may
287
+ * itself be left open by the same emission imbalance; this fixup
288
+ * doesn't address that. Downstream browsers/DOMParser normalise
289
+ * mildly-malformed HTML by closing dangling tags, so the rendered
290
+ * output is usually acceptable — but the warning IS the signal
291
+ * that the input had a real imbalance worth investigating.
243
292
  */
244
293
  private static emitSegments;
245
294
  /**
@@ -343,5 +392,5 @@ declare class HtmlDiff {
343
392
  private findMatch;
344
393
  }
345
394
  //#endregion
346
- export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
395
+ export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
347
396
  //# sourceMappingURL=HtmlDiff.d.cts.map
@@ -63,6 +63,37 @@ interface AnalyzeResult {
63
63
  * `evaluateProjectionApplicability` results.
64
64
  */
65
65
  type ThreeWayOptions = AnalyzeOptions;
66
+ /**
67
+ * Opinionated options that align htmldiff's output with Microsoft Word's
68
+ * track-changes rendering for legal-document rewrites.
69
+ *
70
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
71
+ * LCS match, however small — which fragments long sentence rewrites
72
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
73
+ * "shall"). Word collapses those into a single coarse del+ins, which is
74
+ * dramatically more readable for legal text.
75
+ *
76
+ * 0.25 was tuned empirically against a customer Word reference (US
77
+ * Commercial One CP, May 2026):
78
+ * - short edits (typo / one-word insert): output identical to
79
+ * threshold=0 — inter-match distances are tiny so every match
80
+ * trivially clears the bar;
81
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
82
+ * reference): previously produced 6 dels + 5 ins fragmented around
83
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
84
+ * Word's 1+1 and a major readability win;
85
+ * - higher values (0.3+) collapsed short edits containing inline
86
+ * formatting changes into a single block — too aggressive.
87
+ *
88
+ * Consumers rendering legal documents should spread this into their
89
+ * options:
90
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
91
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
92
+ *
93
+ * Other consumers (machine-readable diff, exact-token alignment) can
94
+ * keep the bare default.
95
+ */
96
+ declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
66
97
  declare class HtmlDiff {
67
98
  /**
68
99
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -173,7 +204,17 @@ declare class HtmlDiff {
173
204
  * @param newText The new text.
174
205
  */
175
206
  constructor(oldText: string, newText: string);
176
- static execute(oldText: string, newText: string): string;
207
+ /**
208
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
209
+ * `executeThreeWay`, with two intentional exceptions documented
210
+ * inline below. Consumers wanting Word-aligned output should spread
211
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
212
+ *
213
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
214
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
215
+ * the recursive cell diff can happen. Callers can't override that.
216
+ */
217
+ static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
177
218
  /**
178
219
  * Analyse a two-way diff and return its raw building blocks: the word
179
220
  * arrays the diff ran against, the operations produced, the original
@@ -237,9 +278,17 @@ declare class HtmlDiff {
237
278
  * `insertTag` and push raw, so the stack entry for the open is
238
279
  * never popped. Rather than throw — which forces the caller's UI
239
280
  * into an error boundary — close every leftover wrap with `</ins>`
240
- * at the end of emission. The resulting HTML has an extra
241
- * `</ins>` next to the formatting closer; DOMParser-normalisation
242
- * downstream produces sensible nesting.
281
+ * at the end of emission.
282
+ *
283
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
284
+ * opener pushed (every formatting opener emits an inner `<ins…>`
285
+ * postInject regardless of whether the outer segment is ins or
286
+ * del). For del-segment formatting openers the outer `<del>` may
287
+ * itself be left open by the same emission imbalance; this fixup
288
+ * doesn't address that. Downstream browsers/DOMParser normalise
289
+ * mildly-malformed HTML by closing dangling tags, so the rendered
290
+ * output is usually acceptable — but the warning IS the signal
291
+ * that the input had a real imbalance worth investigating.
243
292
  */
244
293
  private static emitSegments;
245
294
  /**
@@ -343,5 +392,5 @@ declare class HtmlDiff {
343
392
  private findMatch;
344
393
  }
345
394
  //#endregion
346
- export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
395
+ export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
347
396
  //# sourceMappingURL=HtmlDiff.d.mts.map
package/dist/HtmlDiff.mjs CHANGED
@@ -2035,6 +2035,37 @@ var BlockFinder = class {
2035
2035
  };
2036
2036
  //#endregion
2037
2037
  //#region src/HtmlDiff.ts
2038
+ /**
2039
+ * Opinionated options that align htmldiff's output with Microsoft Word's
2040
+ * track-changes rendering for legal-document rewrites.
2041
+ *
2042
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
2043
+ * LCS match, however small — which fragments long sentence rewrites
2044
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
2045
+ * "shall"). Word collapses those into a single coarse del+ins, which is
2046
+ * dramatically more readable for legal text.
2047
+ *
2048
+ * 0.25 was tuned empirically against a customer Word reference (US
2049
+ * Commercial One CP, May 2026):
2050
+ * - short edits (typo / one-word insert): output identical to
2051
+ * threshold=0 — inter-match distances are tiny so every match
2052
+ * trivially clears the bar;
2053
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
2054
+ * reference): previously produced 6 dels + 5 ins fragmented around
2055
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
2056
+ * Word's 1+1 and a major readability win;
2057
+ * - higher values (0.3+) collapsed short edits containing inline
2058
+ * formatting changes into a single block — too aggressive.
2059
+ *
2060
+ * Consumers rendering legal documents should spread this into their
2061
+ * options:
2062
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
2063
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
2064
+ *
2065
+ * Other consumers (machine-readable diff, exact-token alignment) can
2066
+ * keep the bare default.
2067
+ */
2068
+ const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
2038
2069
  var HtmlDiff = class HtmlDiff {
2039
2070
  /**
2040
2071
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -2187,8 +2218,23 @@ var HtmlDiff = class HtmlDiff {
2187
2218
  this.oldText = oldText;
2188
2219
  this.newText = newText;
2189
2220
  }
2190
- static execute(oldText, newText) {
2191
- return new HtmlDiff(oldText, newText).build();
2221
+ /**
2222
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
2223
+ * `executeThreeWay`, with two intentional exceptions documented
2224
+ * inline below. Consumers wanting Word-aligned output should spread
2225
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
2226
+ *
2227
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
2228
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
2229
+ * the recursive cell diff can happen. Callers can't override that.
2230
+ */
2231
+ static execute(oldText, newText, options = {}) {
2232
+ const inner = new HtmlDiff(oldText, newText);
2233
+ if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
2234
+ if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
2235
+ if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
2236
+ if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
2237
+ return inner.build();
2192
2238
  }
2193
2239
  /**
2194
2240
  * Analyse a two-way diff and return its raw building blocks: the word
@@ -2311,9 +2357,17 @@ var HtmlDiff = class HtmlDiff {
2311
2357
  * `insertTag` and push raw, so the stack entry for the open is
2312
2358
  * never popped. Rather than throw — which forces the caller's UI
2313
2359
  * into an error boundary — close every leftover wrap with `</ins>`
2314
- * at the end of emission. The resulting HTML has an extra
2315
- * `</ins>` next to the formatting closer; DOMParser-normalisation
2316
- * downstream produces sensible nesting.
2360
+ * at the end of emission.
2361
+ *
2362
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
2363
+ * opener pushed (every formatting opener emits an inner `<ins…>`
2364
+ * postInject regardless of whether the outer segment is ins or
2365
+ * del). For del-segment formatting openers the outer `<del>` may
2366
+ * itself be left open by the same emission imbalance; this fixup
2367
+ * doesn't address that. Downstream browsers/DOMParser normalise
2368
+ * mildly-malformed HTML by closing dangling tags, so the rendered
2369
+ * output is usually acceptable — but the warning IS the signal
2370
+ * that the input had a real imbalance worth investigating.
2317
2371
  */
2318
2372
  static emitSegments(segments) {
2319
2373
  const emitter = new HtmlDiff("", "");
@@ -2739,6 +2793,6 @@ var HtmlDiff = class HtmlDiff {
2739
2793
  }
2740
2794
  };
2741
2795
  //#endregion
2742
- export { HtmlDiff as default };
2796
+ export { WORD_ALIGNED_OPTIONS, HtmlDiff as default };
2743
2797
 
2744
2798
  //# sourceMappingURL=HtmlDiff.mjs.map