@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,37 @@ interface AnalyzeResult {
63
63
  * `evaluateProjectionApplicability` results.
64
64
  */
65
65
  type ThreeWayOptions = AnalyzeOptions;
66
+ /**
67
+ * Opinionated options that align htmldiff's output with Microsoft Word's
68
+ * track-changes rendering for legal-document rewrites.
69
+ *
70
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
71
+ * LCS match, however small — which fragments long sentence rewrites
72
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
73
+ * "shall"). Word collapses those into a single coarse del+ins, which is
74
+ * dramatically more readable for legal text.
75
+ *
76
+ * 0.25 was tuned empirically against a customer Word reference (US
77
+ * Commercial One CP, May 2026):
78
+ * - short edits (typo / one-word insert): output identical to
79
+ * threshold=0 — inter-match distances are tiny so every match
80
+ * trivially clears the bar;
81
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
82
+ * reference): previously produced 6 dels + 5 ins fragmented around
83
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
84
+ * Word's 1+1 and a major readability win;
85
+ * - higher values (0.3+) collapsed short edits containing inline
86
+ * formatting changes into a single block — too aggressive.
87
+ *
88
+ * Consumers rendering legal documents should spread this into their
89
+ * options:
90
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
91
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
92
+ *
93
+ * Other consumers (machine-readable diff, exact-token alignment) can
94
+ * keep the bare default.
95
+ */
96
+ declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
66
97
  declare class HtmlDiff {
67
98
  /**
68
99
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -99,6 +130,16 @@ declare class HtmlDiff {
99
130
  private newText;
100
131
  private oldText;
101
132
  private tablePreprocessDepth;
133
+ /**
134
+ * Tracks currently-open formatting-tag wraps. Each entry pairs the
135
+ * opening tag (so a later closing tag can find its match) with the
136
+ * styling info needed to RE-OPEN the wrap if an overlapping
137
+ * formatting-tag close forces it to split. Without the styling info,
138
+ * an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
139
+ * unclosable wrap (the closing tag for the outer wrap arrives while
140
+ * an inner wrap is still on the stack); see `insertTag`'s closing
141
+ * handler for the split logic.
142
+ */
102
143
  private specialTagDiffStack;
103
144
  private newWords;
104
145
  private oldWords;
@@ -163,7 +204,17 @@ declare class HtmlDiff {
163
204
  * @param newText The new text.
164
205
  */
165
206
  constructor(oldText: string, newText: string);
166
- static execute(oldText: string, newText: string): string;
207
+ /**
208
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
209
+ * `executeThreeWay`, with two intentional exceptions documented
210
+ * inline below. Consumers wanting Word-aligned output should spread
211
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
212
+ *
213
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
214
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
215
+ * the recursive cell diff can happen. Callers can't override that.
216
+ */
217
+ static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
167
218
  /**
168
219
  * Analyse a two-way diff and return its raw building blocks: the word
169
220
  * arrays the diff ran against, the operations produced, the original
@@ -192,22 +243,6 @@ declare class HtmlDiff {
192
243
  * why symmetry matters.
193
244
  */
194
245
  static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
195
- /**
196
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
197
- * version CP sent back), and V3 (Me's current draft), produces a
198
- * single attributed HTML output where CP's and Me's changes are
199
- * distinguished by `data-author` ('cp' or 'me') and matching
200
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
201
- * CP's proposal" case (Me deleted text CP had inserted) gets a
202
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
203
- *
204
- * Coordinates the symmetric-projection decision (D1) across both
205
- * internal `analyze` calls so V2 tokenises identically on each side
206
- * of the spine. When `useProjections` is left undefined, the decision
207
- * is the conjunction of both pair-wise heuristics — project iff both
208
- * pairs would project on their own. Pass an explicit boolean to
209
- * override.
210
- */
211
246
  /**
212
247
  * Three-way HTML diff against a shared genesis. Produces attributed
213
248
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
@@ -235,6 +270,25 @@ declare class HtmlDiff {
235
270
  * buffer. Reusing the instance keeps the formatting-tag stack
236
271
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
237
272
  * opened in one segment and closed in another stays balanced.
273
+ *
274
+ * Edge case: an ins/del segment can open a formatting wrap whose
275
+ * matching closer ends up in an equal segment (`<strong>` deleted
276
+ * by CP but `</strong>` kept by both — buildSegments emits the open
277
+ * as del-cp and the close as equal). Equal segments bypass
278
+ * `insertTag` and push raw, so the stack entry for the open is
279
+ * never popped. Rather than throw — which forces the caller's UI
280
+ * into an error boundary — close every leftover wrap with `</ins>`
281
+ * at the end of emission.
282
+ *
283
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
284
+ * opener pushed (every formatting opener emits an inner `<ins…>`
285
+ * postInject regardless of whether the outer segment is ins or
286
+ * del). For del-segment formatting openers the outer `<del>` may
287
+ * itself be left open by the same emission imbalance; this fixup
288
+ * doesn't address that. Downstream browsers/DOMParser normalise
289
+ * mildly-malformed HTML by closing dangling tags, so the rendered
290
+ * output is usually acceptable — but the warning IS the signal
291
+ * that the input had a real imbalance worth investigating.
238
292
  */
239
293
  private static emitSegments;
240
294
  /**
@@ -338,5 +392,5 @@ declare class HtmlDiff {
338
392
  private findMatch;
339
393
  }
340
394
  //#endregion
341
- export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
395
+ export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
342
396
  //# sourceMappingURL=HtmlDiff.d.cts.map
@@ -63,6 +63,37 @@ interface AnalyzeResult {
63
63
  * `evaluateProjectionApplicability` results.
64
64
  */
65
65
  type ThreeWayOptions = AnalyzeOptions;
66
+ /**
67
+ * Opinionated options that align htmldiff's output with Microsoft Word's
68
+ * track-changes rendering for legal-document rewrites.
69
+ *
70
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
71
+ * LCS match, however small — which fragments long sentence rewrites
72
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
73
+ * "shall"). Word collapses those into a single coarse del+ins, which is
74
+ * dramatically more readable for legal text.
75
+ *
76
+ * 0.25 was tuned empirically against a customer Word reference (US
77
+ * Commercial One CP, May 2026):
78
+ * - short edits (typo / one-word insert): output identical to
79
+ * threshold=0 — inter-match distances are tiny so every match
80
+ * trivially clears the bar;
81
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
82
+ * reference): previously produced 6 dels + 5 ins fragmented around
83
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
84
+ * Word's 1+1 and a major readability win;
85
+ * - higher values (0.3+) collapsed short edits containing inline
86
+ * formatting changes into a single block — too aggressive.
87
+ *
88
+ * Consumers rendering legal documents should spread this into their
89
+ * options:
90
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
91
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
92
+ *
93
+ * Other consumers (machine-readable diff, exact-token alignment) can
94
+ * keep the bare default.
95
+ */
96
+ declare const WORD_ALIGNED_OPTIONS: AnalyzeOptions;
66
97
  declare class HtmlDiff {
67
98
  /**
68
99
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -99,6 +130,16 @@ declare class HtmlDiff {
99
130
  private newText;
100
131
  private oldText;
101
132
  private tablePreprocessDepth;
133
+ /**
134
+ * Tracks currently-open formatting-tag wraps. Each entry pairs the
135
+ * opening tag (so a later closing tag can find its match) with the
136
+ * styling info needed to RE-OPEN the wrap if an overlapping
137
+ * formatting-tag close forces it to split. Without the styling info,
138
+ * an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
139
+ * unclosable wrap (the closing tag for the outer wrap arrives while
140
+ * an inner wrap is still on the stack); see `insertTag`'s closing
141
+ * handler for the split logic.
142
+ */
102
143
  private specialTagDiffStack;
103
144
  private newWords;
104
145
  private oldWords;
@@ -163,7 +204,17 @@ declare class HtmlDiff {
163
204
  * @param newText The new text.
164
205
  */
165
206
  constructor(oldText: string, newText: string);
166
- static execute(oldText: string, newText: string): string;
207
+ /**
208
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
209
+ * `executeThreeWay`, with two intentional exceptions documented
210
+ * inline below. Consumers wanting Word-aligned output should spread
211
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
212
+ *
213
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
214
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
215
+ * the recursive cell diff can happen. Callers can't override that.
216
+ */
217
+ static execute(oldText: string, newText: string, options?: AnalyzeOptions): string;
167
218
  /**
168
219
  * Analyse a two-way diff and return its raw building blocks: the word
169
220
  * arrays the diff ran against, the operations produced, the original
@@ -192,22 +243,6 @@ declare class HtmlDiff {
192
243
  * why symmetry matters.
193
244
  */
194
245
  static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
195
- /**
196
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
197
- * version CP sent back), and V3 (Me's current draft), produces a
198
- * single attributed HTML output where CP's and Me's changes are
199
- * distinguished by `data-author` ('cp' or 'me') and matching
200
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
201
- * CP's proposal" case (Me deleted text CP had inserted) gets a
202
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
203
- *
204
- * Coordinates the symmetric-projection decision (D1) across both
205
- * internal `analyze` calls so V2 tokenises identically on each side
206
- * of the spine. When `useProjections` is left undefined, the decision
207
- * is the conjunction of both pair-wise heuristics — project iff both
208
- * pairs would project on their own. Pass an explicit boolean to
209
- * override.
210
- */
211
246
  /**
212
247
  * Three-way HTML diff against a shared genesis. Produces attributed
213
248
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
@@ -235,6 +270,25 @@ declare class HtmlDiff {
235
270
  * buffer. Reusing the instance keeps the formatting-tag stack
236
271
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
237
272
  * opened in one segment and closed in another stays balanced.
273
+ *
274
+ * Edge case: an ins/del segment can open a formatting wrap whose
275
+ * matching closer ends up in an equal segment (`<strong>` deleted
276
+ * by CP but `</strong>` kept by both — buildSegments emits the open
277
+ * as del-cp and the close as equal). Equal segments bypass
278
+ * `insertTag` and push raw, so the stack entry for the open is
279
+ * never popped. Rather than throw — which forces the caller's UI
280
+ * into an error boundary — close every leftover wrap with `</ins>`
281
+ * at the end of emission.
282
+ *
283
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
284
+ * opener pushed (every formatting opener emits an inner `<ins…>`
285
+ * postInject regardless of whether the outer segment is ins or
286
+ * del). For del-segment formatting openers the outer `<del>` may
287
+ * itself be left open by the same emission imbalance; this fixup
288
+ * doesn't address that. Downstream browsers/DOMParser normalise
289
+ * mildly-malformed HTML by closing dangling tags, so the rendered
290
+ * output is usually acceptable — but the warning IS the signal
291
+ * that the input had a real imbalance worth investigating.
238
292
  */
239
293
  private static emitSegments;
240
294
  /**
@@ -338,5 +392,5 @@ declare class HtmlDiff {
338
392
  private findMatch;
339
393
  }
340
394
  //#endregion
341
- export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, HtmlDiff as default };
395
+ export { AnalyzeOptions, AnalyzeResult, ThreeWayOptions, WORD_ALIGNED_OPTIONS, HtmlDiff as default };
342
396
  //# sourceMappingURL=HtmlDiff.d.mts.map