@createiq/htmldiff 1.2.0-beta.5 → 1.2.0-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HtmlDiff.cjs CHANGED
@@ -1,3 +1,7 @@
1
+ Object.defineProperties(exports, {
2
+ __esModule: { value: true },
3
+ [Symbol.toStringTag]: { value: "Module" }
4
+ });
1
5
  //#region src/Match.ts
2
6
  var Match = class {
3
7
  _startInOld;
@@ -2035,6 +2039,37 @@ var BlockFinder = class {
2035
2039
  };
2036
2040
  //#endregion
2037
2041
  //#region src/HtmlDiff.ts
2042
+ /**
2043
+ * Opinionated options that align htmldiff's output with Microsoft Word's
2044
+ * track-changes rendering for legal-document rewrites.
2045
+ *
2046
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
2047
+ * LCS match, however small — which fragments long sentence rewrites
2048
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
2049
+ * "shall"). Word collapses those into a single coarse del+ins, which is
2050
+ * dramatically more readable for legal text.
2051
+ *
2052
+ * 0.25 was tuned empirically against a customer Word reference (US
2053
+ * Commercial One CP, May 2026):
2054
+ * - short edits (typo / one-word insert): output identical to
2055
+ * threshold=0 — inter-match distances are tiny so every match
2056
+ * trivially clears the bar;
2057
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
2058
+ * reference): previously produced 6 dels + 5 ins fragmented around
2059
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
2060
+ * Word's 1+1 and a major readability win;
2061
+ * - higher values (0.3+) collapsed short edits containing inline
2062
+ * formatting changes into a single block — too aggressive.
2063
+ *
2064
+ * Consumers rendering legal documents should spread this into their
2065
+ * options:
2066
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
2067
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
2068
+ *
2069
+ * Other consumers (machine-readable diff, exact-token alignment) can
2070
+ * keep the bare default.
2071
+ */
2072
+ const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
2038
2073
  var HtmlDiff = class HtmlDiff {
2039
2074
  /**
2040
2075
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -2187,8 +2222,23 @@ var HtmlDiff = class HtmlDiff {
2187
2222
  this.oldText = oldText;
2188
2223
  this.newText = newText;
2189
2224
  }
2190
- static execute(oldText, newText) {
2191
- return new HtmlDiff(oldText, newText).build();
2225
+ /**
2226
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
2227
+ * `executeThreeWay`, with two intentional exceptions documented
2228
+ * inline below. Consumers wanting Word-aligned output should spread
2229
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
2230
+ *
2231
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
2232
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
2233
+ * the recursive cell diff can happen. Callers can't override that.
2234
+ */
2235
+ static execute(oldText, newText, options = {}) {
2236
+ const inner = new HtmlDiff(oldText, newText);
2237
+ if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
2238
+ if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
2239
+ if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
2240
+ if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
2241
+ return inner.build();
2192
2242
  }
2193
2243
  /**
2194
2244
  * Analyse a two-way diff and return its raw building blocks: the word
@@ -2311,9 +2361,17 @@ var HtmlDiff = class HtmlDiff {
2311
2361
  * `insertTag` and push raw, so the stack entry for the open is
2312
2362
  * never popped. Rather than throw — which forces the caller's UI
2313
2363
  * into an error boundary — close every leftover wrap with `</ins>`
2314
- * at the end of emission. The resulting HTML has an extra
2315
- * `</ins>` next to the formatting closer; DOMParser-normalisation
2316
- * downstream produces sensible nesting.
2364
+ * at the end of emission.
2365
+ *
2366
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
2367
+ * opener pushed (every formatting opener emits an inner `<ins…>`
2368
+ * postInject regardless of whether the outer segment is ins or
2369
+ * del). For del-segment formatting openers the outer `<del>` may
2370
+ * itself be left open by the same emission imbalance; this fixup
2371
+ * doesn't address that. Downstream browsers/DOMParser normalise
2372
+ * mildly-malformed HTML by closing dangling tags, so the rendered
2373
+ * output is usually acceptable — but the warning IS the signal
2374
+ * that the input had a real imbalance worth investigating.
2317
2375
  */
2318
2376
  static emitSegments(segments) {
2319
2377
  const emitter = new HtmlDiff("", "");
@@ -2739,6 +2797,7 @@ var HtmlDiff = class HtmlDiff {
2739
2797
  }
2740
2798
  };
2741
2799
  //#endregion
2742
- module.exports = HtmlDiff;
2800
+ exports.WORD_ALIGNED_OPTIONS = WORD_ALIGNED_OPTIONS;
2801
+ exports.default = HtmlDiff;
2743
2802
 
2744
2803
  //# sourceMappingURL=HtmlDiff.cjs.map