@createiq/htmldiff 1.2.0-beta.4 → 1.2.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +64 -6
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +1 -1
- package/dist/HtmlDiff.d.mts +1 -1
- package/dist/HtmlDiff.mjs +64 -6
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +14 -2
- package/src/ThreeWayTable.ts +91 -5
- package/test/HtmlDiff.threeWay.tables.spec.ts +28 -0
package/package.json
CHANGED
package/src/HtmlDiff.ts
CHANGED
|
@@ -249,8 +249,20 @@ export default class HtmlDiff {
|
|
|
249
249
|
this.newText = newText
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
-
static execute(oldText: string, newText: string): string {
|
|
253
|
-
|
|
252
|
+
static execute(oldText: string, newText: string, options: AnalyzeOptions = {}): string {
|
|
253
|
+
const inner = new HtmlDiff(oldText, newText)
|
|
254
|
+
if (options.blockExpressions) {
|
|
255
|
+
for (const expr of options.blockExpressions) inner.addBlockExpression(expr)
|
|
256
|
+
}
|
|
257
|
+
if (options.repeatingWordsAccuracy !== undefined) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy
|
|
258
|
+
if (options.orphanMatchThreshold !== undefined) inner.orphanMatchThreshold = options.orphanMatchThreshold
|
|
259
|
+
if (options.ignoreWhitespaceDifferences !== undefined) {
|
|
260
|
+
inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences
|
|
261
|
+
}
|
|
262
|
+
// `useProjections` is intentionally NOT plumbed here — the 2-way
|
|
263
|
+
// path's build() runs its own heuristic. Pass via `analyze` if a
|
|
264
|
+
// caller needs to force it.
|
|
265
|
+
return inner.build()
|
|
254
266
|
}
|
|
255
267
|
|
|
256
268
|
/**
|
package/src/ThreeWayTable.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { lcsAlign, textSimilarity } from './Alignment'
|
|
1
|
+
import { type Alignment, lcsAlign, pairSimilarUnmatched, textSimilarity } from './Alignment'
|
|
2
2
|
import { injectClass, parseOpeningTagAt } from './HtmlScanner'
|
|
3
3
|
import {
|
|
4
4
|
type CellRange,
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
PLACEHOLDER_SUFFIX,
|
|
9
9
|
type RowRange,
|
|
10
10
|
rowKey,
|
|
11
|
+
rowText,
|
|
11
12
|
sameDimensions,
|
|
12
13
|
spliceString,
|
|
13
14
|
type TableRange,
|
|
@@ -143,8 +144,14 @@ function preprocessByContent(
|
|
|
143
144
|
const cKeys = cTables.map(t => tableKey(cpLatest, t))
|
|
144
145
|
const mKeys = mTables.map(t => tableKey(meCurrent, t))
|
|
145
146
|
|
|
146
|
-
|
|
147
|
-
|
|
147
|
+
// Exact tableKey LCS, then fuzzy-pair unmatched runs by content
|
|
148
|
+
// similarity. Without this, a table whose cells were edited (but
|
|
149
|
+
// not its overall shape) fails the exact tableKey match and the
|
|
150
|
+
// table-level aligner pulls it apart into a whole-table del + a
|
|
151
|
+
// whole-table ins. Same fuzzy pass `TableDiff` uses for the 2-way
|
|
152
|
+
// path — `pairSimilarTablesThreeWay` is defined below.
|
|
153
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables)
|
|
154
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables)
|
|
148
155
|
|
|
149
156
|
// Maps: genesisIdx → matching cpIdx (-1 if none); cpIdx → matching genesisIdx; etc.
|
|
150
157
|
const gToCp = new Array<number>(gTables.length).fill(-1)
|
|
@@ -342,6 +349,76 @@ function tableKey(html: string, table: TableRange): string {
|
|
|
342
349
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
|
|
343
350
|
}
|
|
344
351
|
|
|
352
|
+
/**
|
|
353
|
+
* Character-level similarity above which the three-way aligner treats
|
|
354
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
355
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
356
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
357
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
358
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
359
|
+
* enough for 3-way.
|
|
360
|
+
*/
|
|
361
|
+
const THREE_WAY_FUZZY_THRESHOLD = 0.5
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
365
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
366
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
367
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
368
|
+
* the enriched alignment with additional paired entries.
|
|
369
|
+
*
|
|
370
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
371
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
372
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
373
|
+
* its row with genesis (content overlap above threshold) while the
|
|
374
|
+
* other side falls below threshold. That mismatch routes through
|
|
375
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
376
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
377
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
378
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
379
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
380
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
381
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
382
|
+
* back to the boundary-insertion path that emits both sides
|
|
383
|
+
* explicitly.
|
|
384
|
+
*/
|
|
385
|
+
function pairSimilarRowsThreeWay(
|
|
386
|
+
alignment: Alignment[],
|
|
387
|
+
genesis: string,
|
|
388
|
+
newHtml: string,
|
|
389
|
+
oldTable: TableRange,
|
|
390
|
+
newTable: TableRange
|
|
391
|
+
): Alignment[] {
|
|
392
|
+
const oldTexts = oldTable.rows.map(r => rowText(genesis, r))
|
|
393
|
+
const newTexts = newTable.rows.map(r => rowText(newHtml, r))
|
|
394
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
395
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0
|
|
396
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
397
|
+
})
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
402
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
403
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
404
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
405
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
406
|
+
* instead of recursing into per-cell three-way diffs.
|
|
407
|
+
*/
|
|
408
|
+
function pairSimilarTablesThreeWay(
|
|
409
|
+
alignment: Alignment[],
|
|
410
|
+
oldHtml: string,
|
|
411
|
+
newHtml: string,
|
|
412
|
+
oldTables: TableRange[],
|
|
413
|
+
newTables: TableRange[]
|
|
414
|
+
): Alignment[] {
|
|
415
|
+
const oldTexts = oldTables.map(t => t.rows.map(r => rowText(oldHtml, r)).join(' '))
|
|
416
|
+
const newTexts = newTables.map(t => t.rows.map(r => rowText(newHtml, r)).join(' '))
|
|
417
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
|
|
418
|
+
textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
419
|
+
)
|
|
420
|
+
}
|
|
421
|
+
|
|
345
422
|
// ────────────────────────────────────────────────────────────────────────────
|
|
346
423
|
// Per-table diff: positional cells or row-level structural change.
|
|
347
424
|
|
|
@@ -426,8 +503,17 @@ function diffTableStructural(
|
|
|
426
503
|
const cKeys = tC.rows.map(r => rowKey(cpLatest, r))
|
|
427
504
|
const mKeys = tM.rows.map(r => rowKey(meCurrent, r))
|
|
428
505
|
|
|
429
|
-
|
|
430
|
-
|
|
506
|
+
// Exact LCS first, then fuzzy-pair remaining unmatched runs. Without
|
|
507
|
+
// the fuzzy pass, a row where CP edited just a single cell's text
|
|
508
|
+
// produces no key match — the row aligner emits the genesis row as
|
|
509
|
+
// CP-deleted AND CP's reshaped row as inserted, when a cell-level
|
|
510
|
+
// diff against the paired row would render the edit far more
|
|
511
|
+
// legibly. The 2-way path (`TableDiff.pairSimilarUnmatchedRows`)
|
|
512
|
+
// has done this since inception; bringing the three-way path in
|
|
513
|
+
// step removes the asymmetry where the cp-only / all-changes view
|
|
514
|
+
// looks markedly worse than plain 2-way for ordinary cell edits.
|
|
515
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC)
|
|
516
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM)
|
|
431
517
|
|
|
432
518
|
// genesisIdx → matching cpIdx (-1 if cp deleted this row)
|
|
433
519
|
const gToCp = new Array<number>(tG.rows.length).fill(-1)
|
|
@@ -288,6 +288,34 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
|
|
|
288
288
|
)
|
|
289
289
|
})
|
|
290
290
|
|
|
291
|
+
it('CP edited one cell in a row (same shape) — fuzzy-pairs and emits a cell-level diff, not whole-row del+ins', () => {
|
|
292
|
+
// Regression: the 3-way row aligner only did exact lcsAlign over
|
|
293
|
+
// rowKey, so a row where CP edited a single cell's text produced
|
|
294
|
+
// no key match and the algorithm split the row into a whole-row
|
|
295
|
+
// delete + whole-row insert. The 2-way path has always run a
|
|
296
|
+
// fuzzy-pairing pass after lcsAlign; bringing the 3-way path in
|
|
297
|
+
// step removes the asymmetry where cp-only / all-changes views
|
|
298
|
+
// looked materially worse than 2-way for ordinary cell edits.
|
|
299
|
+
//
|
|
300
|
+
// Same-shape genesis/cp/me; CP edited the middle cell's text.
|
|
301
|
+
// Me === genesis. Expect a paired row with cell-level cp-ins
|
|
302
|
+
// markup, NOT two distinct whole-row entries.
|
|
303
|
+
const out = HtmlDiff.executeThreeWay(
|
|
304
|
+
'<table><tr><td>Party A</td><td>old details</td><td>kept</td></tr></table>',
|
|
305
|
+
'<table><tr><td>Party A</td><td>new details</td><td>kept</td></tr></table>',
|
|
306
|
+
'<table><tr><td>Party A</td><td>old details</td><td>kept</td></tr></table>'
|
|
307
|
+
)
|
|
308
|
+
// CP's edit lives inside the row, not as a parallel whole-row
|
|
309
|
+
// delete-then-insert. Whole-row markers would carry `class='diffdel ...'`
|
|
310
|
+
// or `class='diffins ...'` on the `<tr>` itself.
|
|
311
|
+
expect(out).not.toMatch(/<tr [^>]*class=['"]diffdel/)
|
|
312
|
+
expect(out).not.toMatch(/<tr [^>]*class=['"]diffins/)
|
|
313
|
+
expect(out).toContain('Party A')
|
|
314
|
+
expect(out).toContain("data-author='cp'")
|
|
315
|
+
// Me === genesis so any me attribution would indicate a swap.
|
|
316
|
+
expect(out).not.toContain("data-author='me'")
|
|
317
|
+
})
|
|
318
|
+
|
|
291
319
|
it('cell-count mismatch: both sides restructured differently — both ins rows attributed', () => {
|
|
292
320
|
// Genesis 2 cells, CP 3 cells, Me 4 cells. Neither side keeps
|
|
293
321
|
// the genesis shape, so both restructures must be visible.
|