@usejunior/docx-core 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +86 -28
- package/dist/.tsbuildinfo +1 -0
- package/dist/atomizer.d.ts +218 -0
- package/dist/atomizer.d.ts.map +1 -0
- package/dist/atomizer.js +856 -0
- package/dist/atomizer.js.map +1 -0
- package/dist/baselines/atomizer/atomLcs.d.ts +96 -0
- package/dist/baselines/atomizer/atomLcs.d.ts.map +1 -0
- package/dist/baselines/atomizer/atomLcs.js +347 -0
- package/dist/baselines/atomizer/atomLcs.js.map +1 -0
- package/dist/baselines/atomizer/debug.d.ts +41 -0
- package/dist/baselines/atomizer/debug.d.ts.map +1 -0
- package/dist/baselines/atomizer/debug.js +85 -0
- package/dist/baselines/atomizer/debug.js.map +1 -0
- package/dist/baselines/atomizer/documentReconstructor.d.ts +64 -0
- package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -0
- package/dist/baselines/atomizer/documentReconstructor.js +939 -0
- package/dist/baselines/atomizer/documentReconstructor.js.map +1 -0
- package/dist/baselines/atomizer/hierarchicalLcs.d.ts +111 -0
- package/dist/baselines/atomizer/hierarchicalLcs.d.ts.map +1 -0
- package/dist/baselines/atomizer/hierarchicalLcs.js +469 -0
- package/dist/baselines/atomizer/hierarchicalLcs.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier.d.ts +183 -0
- package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier.js +1600 -0
- package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -0
- package/dist/baselines/atomizer/numberingIntegration.d.ts +59 -0
- package/dist/baselines/atomizer/numberingIntegration.d.ts.map +1 -0
- package/dist/baselines/atomizer/numberingIntegration.js +209 -0
- package/dist/baselines/atomizer/numberingIntegration.js.map +1 -0
- package/dist/baselines/atomizer/pipeline.d.ts +65 -0
- package/dist/baselines/atomizer/pipeline.d.ts.map +1 -0
- package/dist/baselines/atomizer/pipeline.js +510 -0
- package/dist/baselines/atomizer/pipeline.js.map +1 -0
- package/dist/baselines/atomizer/premergeRuns.d.ts +26 -0
- package/dist/baselines/atomizer/premergeRuns.d.ts.map +1 -0
- package/dist/baselines/atomizer/premergeRuns.js +150 -0
- package/dist/baselines/atomizer/premergeRuns.js.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.d.ts +63 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.d.ts.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.js +254 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.js.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts +64 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js +586 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -0
- package/dist/baselines/atomizer/xmlToWmlElement.d.ts +65 -0
- package/dist/baselines/atomizer/xmlToWmlElement.d.ts.map +1 -0
- package/dist/baselines/atomizer/xmlToWmlElement.js +95 -0
- package/dist/baselines/atomizer/xmlToWmlElement.js.map +1 -0
- package/dist/baselines/diffmatch/documentBuilder.d.ts +44 -0
- package/dist/baselines/diffmatch/documentBuilder.d.ts.map +1 -0
- package/dist/baselines/diffmatch/documentBuilder.js +227 -0
- package/dist/baselines/diffmatch/documentBuilder.js.map +1 -0
- package/dist/baselines/diffmatch/paragraphAlignment.d.ts +75 -0
- package/dist/baselines/diffmatch/paragraphAlignment.d.ts.map +1 -0
- package/dist/baselines/diffmatch/paragraphAlignment.js +206 -0
- package/dist/baselines/diffmatch/paragraphAlignment.js.map +1 -0
- package/dist/baselines/diffmatch/pipeline.d.ts +33 -0
- package/dist/baselines/diffmatch/pipeline.d.ts.map +1 -0
- package/dist/baselines/diffmatch/pipeline.js +84 -0
- package/dist/baselines/diffmatch/pipeline.js.map +1 -0
- package/dist/baselines/diffmatch/runDiff.d.ts +53 -0
- package/dist/baselines/diffmatch/runDiff.d.ts.map +1 -0
- package/dist/baselines/diffmatch/runDiff.js +253 -0
- package/dist/baselines/diffmatch/runDiff.js.map +1 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.d.ts +64 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.d.ts.map +1 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.js +178 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.js.map +1 -0
- package/dist/baselines/diffmatch/xmlParser.d.ts +45 -0
- package/dist/baselines/diffmatch/xmlParser.d.ts.map +1 -0
- package/dist/baselines/diffmatch/xmlParser.js +344 -0
- package/dist/baselines/diffmatch/xmlParser.js.map +1 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts +51 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts.map +1 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.js +83 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.js.map +1 -0
- package/dist/baselines/wmlcomparer/DotnetCli.d.ts +40 -0
- package/dist/baselines/wmlcomparer/DotnetCli.d.ts.map +1 -0
- package/dist/baselines/wmlcomparer/DotnetCli.js +135 -0
- package/dist/baselines/wmlcomparer/DotnetCli.js.map +1 -0
- package/dist/benchmark/metrics.d.ts +72 -0
- package/dist/benchmark/metrics.d.ts.map +1 -0
- package/dist/benchmark/metrics.js +45 -0
- package/dist/benchmark/metrics.js.map +1 -0
- package/dist/benchmark/reporter.d.ts +23 -0
- package/dist/benchmark/reporter.d.ts.map +1 -0
- package/dist/benchmark/reporter.js +147 -0
- package/dist/benchmark/reporter.js.map +1 -0
- package/dist/benchmark/runner.d.ts +30 -0
- package/dist/benchmark/runner.d.ts.map +1 -0
- package/dist/benchmark/runner.js +233 -0
- package/dist/benchmark/runner.js.map +1 -0
- package/dist/cli/compare-two.d.ts +28 -0
- package/dist/cli/compare-two.d.ts.map +1 -0
- package/dist/cli/compare-two.js +110 -0
- package/dist/cli/compare-two.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +21 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core-types.d.ts +296 -0
- package/dist/core-types.d.ts.map +1 -0
- package/dist/core-types.js +122 -0
- package/dist/core-types.js.map +1 -0
- package/dist/footnotes.d.ts +144 -0
- package/dist/footnotes.d.ts.map +1 -0
- package/dist/footnotes.js +291 -0
- package/dist/footnotes.js.map +1 -0
- package/dist/format-detection.d.ts +120 -0
- package/dist/format-detection.d.ts.map +1 -0
- package/dist/format-detection.js +338 -0
- package/dist/format-detection.js.map +1 -0
- package/dist/index.d.ts +177 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +55 -0
- package/dist/index.js.map +1 -0
- package/dist/integration/output-artifacts.d.ts +6 -0
- package/dist/integration/output-artifacts.d.ts.map +1 -0
- package/dist/integration/output-artifacts.js +30 -0
- package/dist/integration/output-artifacts.js.map +1 -0
- package/dist/move-detection.d.ts +211 -0
- package/dist/move-detection.d.ts.map +1 -0
- package/dist/move-detection.js +391 -0
- package/dist/move-detection.js.map +1 -0
- package/dist/numbering.d.ts +136 -0
- package/dist/numbering.d.ts.map +1 -0
- package/dist/numbering.js +446 -0
- package/dist/numbering.js.map +1 -0
- package/dist/primitives/accept_changes.d.ts +30 -0
- package/dist/primitives/accept_changes.d.ts.map +1 -0
- package/dist/primitives/accept_changes.js +241 -0
- package/dist/primitives/accept_changes.js.map +1 -0
- package/dist/primitives/bookmarks.d.ts +12 -0
- package/dist/primitives/bookmarks.d.ts.map +1 -0
- package/dist/primitives/bookmarks.js +248 -0
- package/dist/primitives/bookmarks.js.map +1 -0
- package/dist/primitives/comments.d.ts +88 -0
- package/dist/primitives/comments.d.ts.map +1 -0
- package/dist/primitives/comments.js +703 -0
- package/dist/primitives/comments.js.map +1 -0
- package/dist/primitives/document.d.ts +168 -0
- package/dist/primitives/document.d.ts.map +1 -0
- package/dist/primitives/document.js +532 -0
- package/dist/primitives/document.js.map +1 -0
- package/dist/primitives/document_view.d.ts +93 -0
- package/dist/primitives/document_view.d.ts.map +1 -0
- package/dist/primitives/document_view.js +722 -0
- package/dist/primitives/document_view.js.map +1 -0
- package/dist/primitives/dom-helpers.d.ts +94 -0
- package/dist/primitives/dom-helpers.d.ts.map +1 -0
- package/dist/primitives/dom-helpers.js +219 -0
- package/dist/primitives/dom-helpers.js.map +1 -0
- package/dist/primitives/errors.d.ts +7 -0
- package/dist/primitives/errors.d.ts.map +1 -0
- package/dist/primitives/errors.js +10 -0
- package/dist/primitives/errors.js.map +1 -0
- package/dist/primitives/extract_revisions.d.ts +50 -0
- package/dist/primitives/extract_revisions.d.ts.map +1 -0
- package/dist/primitives/extract_revisions.js +340 -0
- package/dist/primitives/extract_revisions.js.map +1 -0
- package/dist/primitives/footnotes.d.ts +37 -0
- package/dist/primitives/footnotes.d.ts.map +1 -0
- package/dist/primitives/footnotes.js +552 -0
- package/dist/primitives/footnotes.js.map +1 -0
- package/dist/primitives/formatting_tags.d.ts +30 -0
- package/dist/primitives/formatting_tags.d.ts.map +1 -0
- package/dist/primitives/formatting_tags.js +217 -0
- package/dist/primitives/formatting_tags.js.map +1 -0
- package/dist/primitives/index.d.ts +26 -0
- package/dist/primitives/index.d.ts.map +1 -0
- package/dist/primitives/index.js +26 -0
- package/dist/primitives/index.js.map +1 -0
- package/dist/primitives/layout.d.ts +53 -0
- package/dist/primitives/layout.d.ts.map +1 -0
- package/dist/primitives/layout.js +178 -0
- package/dist/primitives/layout.js.map +1 -0
- package/dist/primitives/list_labels.d.ts +19 -0
- package/dist/primitives/list_labels.d.ts.map +1 -0
- package/dist/primitives/list_labels.js +57 -0
- package/dist/primitives/list_labels.js.map +1 -0
- package/dist/primitives/matching.d.ts +17 -0
- package/dist/primitives/matching.d.ts.map +1 -0
- package/dist/primitives/matching.js +144 -0
- package/dist/primitives/matching.js.map +1 -0
- package/dist/primitives/merge_runs.d.ts +23 -0
- package/dist/primitives/merge_runs.d.ts.map +1 -0
- package/dist/primitives/merge_runs.js +195 -0
- package/dist/primitives/merge_runs.js.map +1 -0
- package/dist/primitives/namespaces.d.ts +90 -0
- package/dist/primitives/namespaces.d.ts.map +1 -0
- package/dist/primitives/namespaces.js +107 -0
- package/dist/primitives/namespaces.js.map +1 -0
- package/dist/primitives/numbering.d.ts +27 -0
- package/dist/primitives/numbering.d.ts.map +1 -0
- package/dist/primitives/numbering.js +182 -0
- package/dist/primitives/numbering.js.map +1 -0
- package/dist/primitives/prevent_double_elevation.d.ts +18 -0
- package/dist/primitives/prevent_double_elevation.d.ts.map +1 -0
- package/dist/primitives/prevent_double_elevation.js +190 -0
- package/dist/primitives/prevent_double_elevation.js.map +1 -0
- package/dist/primitives/reject_changes.d.ts +27 -0
- package/dist/primitives/reject_changes.d.ts.map +1 -0
- package/dist/primitives/reject_changes.js +371 -0
- package/dist/primitives/reject_changes.js.map +1 -0
- package/dist/primitives/relationships.d.ts +7 -0
- package/dist/primitives/relationships.d.ts.map +1 -0
- package/dist/primitives/relationships.js +24 -0
- package/dist/primitives/relationships.js.map +1 -0
- package/dist/primitives/semantic_tags.d.ts +32 -0
- package/dist/primitives/semantic_tags.d.ts.map +1 -0
- package/dist/primitives/semantic_tags.js +139 -0
- package/dist/primitives/semantic_tags.js.map +1 -0
- package/dist/primitives/simplify_redlines.d.ts +19 -0
- package/dist/primitives/simplify_redlines.d.ts.map +1 -0
- package/dist/primitives/simplify_redlines.js +94 -0
- package/dist/primitives/simplify_redlines.js.map +1 -0
- package/dist/primitives/styles.d.ts +36 -0
- package/dist/primitives/styles.d.ts.map +1 -0
- package/dist/primitives/styles.js +190 -0
- package/dist/primitives/styles.js.map +1 -0
- package/dist/primitives/text.d.ts +27 -0
- package/dist/primitives/text.d.ts.map +1 -0
- package/dist/primitives/text.js +416 -0
- package/dist/primitives/text.js.map +1 -0
- package/dist/primitives/validate_document.d.ts +24 -0
- package/dist/primitives/validate_document.d.ts.map +1 -0
- package/dist/primitives/validate_document.js +147 -0
- package/dist/primitives/validate_document.js.map +1 -0
- package/dist/primitives/xml.d.ts +5 -0
- package/dist/primitives/xml.d.ts.map +1 -0
- package/dist/primitives/xml.js +19 -0
- package/dist/primitives/xml.js.map +1 -0
- package/dist/primitives/zip.d.ts +25 -0
- package/dist/primitives/zip.d.ts.map +1 -0
- package/dist/primitives/zip.js +78 -0
- package/dist/primitives/zip.js.map +1 -0
- package/dist/shared/docx/DocxArchive.d.ts +94 -0
- package/dist/shared/docx/DocxArchive.d.ts.map +1 -0
- package/dist/shared/docx/DocxArchive.js +169 -0
- package/dist/shared/docx/DocxArchive.js.map +1 -0
- package/dist/shared/ooxml/namespaces.d.ts +149 -0
- package/dist/shared/ooxml/namespaces.d.ts.map +1 -0
- package/dist/shared/ooxml/namespaces.js +224 -0
- package/dist/shared/ooxml/namespaces.js.map +1 -0
- package/dist/shared/ooxml/types.d.ts +136 -0
- package/dist/shared/ooxml/types.d.ts.map +1 -0
- package/dist/shared/ooxml/types.js +7 -0
- package/dist/shared/ooxml/types.js.map +1 -0
- package/package.json +63 -6
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Atomizer Pipeline
|
|
3
|
+
*
|
|
4
|
+
* Main orchestration for the atomizer-based document comparison.
|
|
5
|
+
* Integrates atomization, LCS comparison, move detection, format detection,
|
|
6
|
+
* and document reconstruction.
|
|
7
|
+
*/
|
|
8
|
+
import { DocxArchive } from '../../shared/docx/DocxArchive.js';
|
|
9
|
+
import { DEFAULT_MOVE_DETECTION_SETTINGS, DEFAULT_FORMAT_DETECTION_SETTINGS, CorrelationStatus, } from '../../core-types.js';
|
|
10
|
+
import { atomizeTree, assignParagraphIndices } from '../../atomizer.js';
|
|
11
|
+
import { detectMovesInAtomList } from '../../move-detection.js';
|
|
12
|
+
import { detectFormatChangesInAtomList } from '../../format-detection.js';
|
|
13
|
+
import { parseDocumentXml, findBody, backfillParentReferences, } from './xmlToWmlElement.js';
|
|
14
|
+
import { findAllByTagName, getLeafText } from '../../primitives/index.js';
|
|
15
|
+
import { createMergedAtomList, assignUnifiedParagraphIndices, } from './atomLcs.js';
|
|
16
|
+
import { hierarchicalCompare, markHierarchicalCorrelationStatus, } from './hierarchicalLcs.js';
|
|
17
|
+
import { reconstructDocument, computeReconstructionStats, } from './documentReconstructor.js';
|
|
18
|
+
import { modifyRevisedDocument } from './inPlaceModifier.js';
|
|
19
|
+
import { acceptAllChanges, rejectAllChanges, extractTextWithParagraphs, compareTexts, } from './trackChangesAcceptorAst.js';
|
|
20
|
+
import { virtualizeNumberingLabels, DEFAULT_NUMBERING_OPTIONS, } from './numberingIntegration.js';
|
|
21
|
+
import { premergeAdjacentRuns } from './premergeRuns.js';
|
|
22
|
+
function arraysEqual(a, b) {
|
|
23
|
+
if (a.length !== b.length)
|
|
24
|
+
return false;
|
|
25
|
+
for (let i = 0; i < a.length; i++) {
|
|
26
|
+
if (a[i] !== b[i])
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
function collectReferencedBookmarkNames(root) {
|
|
32
|
+
const refs = new Set();
|
|
33
|
+
const refRegex = /\b(?:PAGEREF|REF)\s+([^\s\\]+)/g;
|
|
34
|
+
for (const node of findAllByTagName(root, 'w:instrText')) {
|
|
35
|
+
const instr = getLeafText(node) ?? '';
|
|
36
|
+
for (const match of instr.matchAll(refRegex)) {
|
|
37
|
+
const name = match[1]?.trim();
|
|
38
|
+
if (name)
|
|
39
|
+
refs.add(name);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return Array.from(refs).sort();
|
|
43
|
+
}
|
|
44
|
+
function collectBookmarkDiagnostics(documentXml) {
|
|
45
|
+
const root = parseDocumentXml(documentXml);
|
|
46
|
+
const startSet = new Set();
|
|
47
|
+
const endSet = new Set();
|
|
48
|
+
const startNameSet = new Set();
|
|
49
|
+
const duplicateStartSet = new Set();
|
|
50
|
+
const duplicateEndSet = new Set();
|
|
51
|
+
const duplicateStartNameSet = new Set();
|
|
52
|
+
for (const node of findAllByTagName(root, 'w:bookmarkStart')) {
|
|
53
|
+
const id = node.getAttribute('w:id');
|
|
54
|
+
if (!id)
|
|
55
|
+
continue;
|
|
56
|
+
if (startSet.has(id))
|
|
57
|
+
duplicateStartSet.add(id);
|
|
58
|
+
else
|
|
59
|
+
startSet.add(id);
|
|
60
|
+
const name = node.getAttribute('w:name');
|
|
61
|
+
if (name) {
|
|
62
|
+
if (startNameSet.has(name))
|
|
63
|
+
duplicateStartNameSet.add(name);
|
|
64
|
+
else
|
|
65
|
+
startNameSet.add(name);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
for (const node of findAllByTagName(root, 'w:bookmarkEnd')) {
|
|
69
|
+
const id = node.getAttribute('w:id');
|
|
70
|
+
if (!id)
|
|
71
|
+
continue;
|
|
72
|
+
if (endSet.has(id))
|
|
73
|
+
duplicateEndSet.add(id);
|
|
74
|
+
else
|
|
75
|
+
endSet.add(id);
|
|
76
|
+
}
|
|
77
|
+
const startIds = Array.from(startSet).sort();
|
|
78
|
+
const endIds = Array.from(endSet).sort();
|
|
79
|
+
const startNames = Array.from(startNameSet).sort();
|
|
80
|
+
const referencedBookmarkNames = collectReferencedBookmarkNames(root);
|
|
81
|
+
const unresolvedReferenceNames = referencedBookmarkNames
|
|
82
|
+
.filter((name) => !startNameSet.has(name))
|
|
83
|
+
.sort();
|
|
84
|
+
const unmatchedStartIds = startIds.filter((id) => !endSet.has(id));
|
|
85
|
+
const unmatchedEndIds = endIds.filter((id) => !startSet.has(id));
|
|
86
|
+
return {
|
|
87
|
+
startIds,
|
|
88
|
+
endIds,
|
|
89
|
+
startNames,
|
|
90
|
+
duplicateStartNames: Array.from(duplicateStartNameSet).sort(),
|
|
91
|
+
referencedBookmarkNames,
|
|
92
|
+
unresolvedReferenceNames,
|
|
93
|
+
duplicateStartIds: Array.from(duplicateStartSet).sort(),
|
|
94
|
+
duplicateEndIds: Array.from(duplicateEndSet).sort(),
|
|
95
|
+
unmatchedStartIds,
|
|
96
|
+
unmatchedEndIds,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Bookmark round-trip safety is semantic, not byte/ID exact:
|
|
101
|
+
* - Bookmark IDs may be renumbered by reconstruction/Word and still be valid.
|
|
102
|
+
* - Bookmark names and field-reference targets must stay intact.
|
|
103
|
+
* - Structural integrity (balanced, no duplicates) must remain intact.
|
|
104
|
+
*/
|
|
105
|
+
function bookmarkDiagnosticsSemanticallyEqual(expected, actual) {
|
|
106
|
+
return (arraysEqual(expected.startNames, actual.startNames) &&
|
|
107
|
+
arraysEqual(expected.duplicateStartNames, actual.duplicateStartNames) &&
|
|
108
|
+
arraysEqual(expected.referencedBookmarkNames, actual.referencedBookmarkNames) &&
|
|
109
|
+
arraysEqual(expected.unresolvedReferenceNames, actual.unresolvedReferenceNames) &&
|
|
110
|
+
arraysEqual(expected.duplicateStartIds, actual.duplicateStartIds) &&
|
|
111
|
+
arraysEqual(expected.duplicateEndIds, actual.duplicateEndIds) &&
|
|
112
|
+
arraysEqual(expected.unmatchedStartIds, actual.unmatchedStartIds) &&
|
|
113
|
+
arraysEqual(expected.unmatchedEndIds, actual.unmatchedEndIds));
|
|
114
|
+
}
|
|
115
|
+
function diffIds(expected, actual) {
|
|
116
|
+
const expectedSet = new Set(expected);
|
|
117
|
+
const actualSet = new Set(actual);
|
|
118
|
+
const missing = expected.filter((id) => !actualSet.has(id));
|
|
119
|
+
const unexpected = actual.filter((id) => !expectedSet.has(id));
|
|
120
|
+
return { missing, unexpected };
|
|
121
|
+
}
|
|
122
|
+
function buildTextMismatchDetails(expectedText, actualText) {
|
|
123
|
+
const comparison = compareTexts(expectedText, actualText);
|
|
124
|
+
const expectedParas = expectedText.split('\n');
|
|
125
|
+
const actualParas = actualText.split('\n');
|
|
126
|
+
const maxLen = Math.max(expectedParas.length, actualParas.length);
|
|
127
|
+
let firstDifferingParagraphIndex = -1;
|
|
128
|
+
for (let i = 0; i < maxLen; i++) {
|
|
129
|
+
if ((expectedParas[i] ?? '') !== (actualParas[i] ?? '')) {
|
|
130
|
+
firstDifferingParagraphIndex = i;
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
expectedLength: comparison.expectedLength,
|
|
136
|
+
actualLength: comparison.actualLength,
|
|
137
|
+
firstDifferingParagraphIndex,
|
|
138
|
+
expectedParagraph: firstDifferingParagraphIndex >= 0 ? (expectedParas[firstDifferingParagraphIndex] ?? '') : '',
|
|
139
|
+
actualParagraph: firstDifferingParagraphIndex >= 0 ? (actualParas[firstDifferingParagraphIndex] ?? '') : '',
|
|
140
|
+
differenceSample: comparison.differences.slice(0, 3),
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
function buildBookmarkMismatchDetails(expected, actual) {
|
|
144
|
+
return {
|
|
145
|
+
startNames: diffIds(expected.startNames, actual.startNames),
|
|
146
|
+
referencedBookmarkNames: diffIds(expected.referencedBookmarkNames, actual.referencedBookmarkNames),
|
|
147
|
+
unresolvedReferenceNames: diffIds(expected.unresolvedReferenceNames, actual.unresolvedReferenceNames),
|
|
148
|
+
startIds: diffIds(expected.startIds, actual.startIds),
|
|
149
|
+
endIds: diffIds(expected.endIds, actual.endIds),
|
|
150
|
+
expectedDuplicateStartNames: expected.duplicateStartNames,
|
|
151
|
+
actualDuplicateStartNames: actual.duplicateStartNames,
|
|
152
|
+
expectedDuplicateStartIds: expected.duplicateStartIds,
|
|
153
|
+
actualDuplicateStartIds: actual.duplicateStartIds,
|
|
154
|
+
expectedDuplicateEndIds: expected.duplicateEndIds,
|
|
155
|
+
actualDuplicateEndIds: actual.duplicateEndIds,
|
|
156
|
+
expectedUnmatchedStartIds: expected.unmatchedStartIds,
|
|
157
|
+
actualUnmatchedStartIds: actual.unmatchedStartIds,
|
|
158
|
+
expectedUnmatchedEndIds: expected.unmatchedEndIds,
|
|
159
|
+
actualUnmatchedEndIds: actual.unmatchedEndIds,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
function summarizeIdDelta(delta) {
|
|
163
|
+
return {
|
|
164
|
+
missingCount: delta.missing.length,
|
|
165
|
+
unexpectedCount: delta.unexpected.length,
|
|
166
|
+
firstMissing: delta.missing[0],
|
|
167
|
+
firstUnexpected: delta.unexpected[0],
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
function truncateForSummary(value, maxLength = 160) {
|
|
171
|
+
if (value.length <= maxLength) {
|
|
172
|
+
return value;
|
|
173
|
+
}
|
|
174
|
+
return `${value.slice(0, maxLength)}...`;
|
|
175
|
+
}
|
|
176
|
+
function summarizeTextMismatch(details) {
|
|
177
|
+
return {
|
|
178
|
+
firstDifferingParagraphIndex: details.firstDifferingParagraphIndex,
|
|
179
|
+
expectedParagraph: truncateForSummary(details.expectedParagraph),
|
|
180
|
+
actualParagraph: truncateForSummary(details.actualParagraph),
|
|
181
|
+
firstDifference: details.differenceSample[0] ?? 'No diff sample',
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
function summarizeBookmarkMismatch(details) {
|
|
185
|
+
return {
|
|
186
|
+
startNames: summarizeIdDelta(details.startNames),
|
|
187
|
+
referencedBookmarkNames: summarizeIdDelta(details.referencedBookmarkNames),
|
|
188
|
+
unresolvedReferenceNames: summarizeIdDelta(details.unresolvedReferenceNames),
|
|
189
|
+
startIds: summarizeIdDelta(details.startIds),
|
|
190
|
+
endIds: summarizeIdDelta(details.endIds),
|
|
191
|
+
unmatchedStartCount: details.actualUnmatchedStartIds.length,
|
|
192
|
+
unmatchedEndCount: details.actualUnmatchedEndIds.length,
|
|
193
|
+
firstUnmatchedStartId: details.actualUnmatchedStartIds[0],
|
|
194
|
+
firstUnmatchedEndId: details.actualUnmatchedEndIds[0],
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
function buildFailureSummary(failureDetails) {
|
|
198
|
+
if (!failureDetails) {
|
|
199
|
+
return undefined;
|
|
200
|
+
}
|
|
201
|
+
const summary = {};
|
|
202
|
+
if (failureDetails.acceptText) {
|
|
203
|
+
summary.acceptText = summarizeTextMismatch(failureDetails.acceptText);
|
|
204
|
+
}
|
|
205
|
+
if (failureDetails.rejectText) {
|
|
206
|
+
summary.rejectText = summarizeTextMismatch(failureDetails.rejectText);
|
|
207
|
+
}
|
|
208
|
+
if (failureDetails.acceptBookmarks) {
|
|
209
|
+
summary.acceptBookmarks = summarizeBookmarkMismatch(failureDetails.acceptBookmarks);
|
|
210
|
+
}
|
|
211
|
+
if (failureDetails.rejectBookmarks) {
|
|
212
|
+
summary.rejectBookmarks = summarizeBookmarkMismatch(failureDetails.rejectBookmarks);
|
|
213
|
+
}
|
|
214
|
+
return Object.keys(summary).length > 0 ? summary : undefined;
|
|
215
|
+
}
|
|
216
|
+
function evaluateSafetyChecks(originalTextForRoundTrip, revisedTextForRoundTrip, originalBookmarkDiagnostics, revisedBookmarkDiagnostics, candidateXml) {
|
|
217
|
+
const acceptedXml = acceptAllChanges(candidateXml);
|
|
218
|
+
const rejectedXml = rejectAllChanges(candidateXml);
|
|
219
|
+
const acceptedText = extractTextWithParagraphs(acceptedXml);
|
|
220
|
+
const rejectedText = extractTextWithParagraphs(rejectedXml);
|
|
221
|
+
const acceptedBookmarkDiagnostics = collectBookmarkDiagnostics(acceptedXml);
|
|
222
|
+
const rejectedBookmarkDiagnostics = collectBookmarkDiagnostics(rejectedXml);
|
|
223
|
+
const acceptTextComparison = compareTexts(revisedTextForRoundTrip, acceptedText);
|
|
224
|
+
const rejectTextComparison = compareTexts(originalTextForRoundTrip, rejectedText);
|
|
225
|
+
const checks = {
|
|
226
|
+
acceptText: acceptTextComparison.normalizedIdentical,
|
|
227
|
+
rejectText: rejectTextComparison.normalizedIdentical,
|
|
228
|
+
acceptBookmarks: bookmarkDiagnosticsSemanticallyEqual(revisedBookmarkDiagnostics, acceptedBookmarkDiagnostics),
|
|
229
|
+
rejectBookmarks: bookmarkDiagnosticsSemanticallyEqual(originalBookmarkDiagnostics, rejectedBookmarkDiagnostics),
|
|
230
|
+
};
|
|
231
|
+
const failedChecks = Object.entries(checks)
|
|
232
|
+
.filter(([, ok]) => !ok)
|
|
233
|
+
.map(([name]) => name);
|
|
234
|
+
const failureDetails = {};
|
|
235
|
+
if (!checks.acceptText) {
|
|
236
|
+
failureDetails.acceptText = buildTextMismatchDetails(revisedTextForRoundTrip, acceptedText);
|
|
237
|
+
}
|
|
238
|
+
if (!checks.rejectText) {
|
|
239
|
+
failureDetails.rejectText = buildTextMismatchDetails(originalTextForRoundTrip, rejectedText);
|
|
240
|
+
}
|
|
241
|
+
if (!checks.acceptBookmarks) {
|
|
242
|
+
failureDetails.acceptBookmarks = buildBookmarkMismatchDetails(revisedBookmarkDiagnostics, acceptedBookmarkDiagnostics);
|
|
243
|
+
}
|
|
244
|
+
if (!checks.rejectBookmarks) {
|
|
245
|
+
failureDetails.rejectBookmarks = buildBookmarkMismatchDetails(originalBookmarkDiagnostics, rejectedBookmarkDiagnostics);
|
|
246
|
+
}
|
|
247
|
+
return {
|
|
248
|
+
safe: failedChecks.length === 0,
|
|
249
|
+
checks,
|
|
250
|
+
failedChecks,
|
|
251
|
+
failureDetails: failedChecks.length > 0 ? failureDetails : undefined,
|
|
252
|
+
failureSummary: failedChecks.length > 0 ? buildFailureSummary(failureDetails) : undefined,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Compare two DOCX documents using the atomizer-based approach.
|
|
257
|
+
*
|
|
258
|
+
* Pipeline steps:
|
|
259
|
+
* 1. Load DOCX archives
|
|
260
|
+
* 2. Extract document.xml
|
|
261
|
+
* 3. Parse to WmlElement trees
|
|
262
|
+
* 4. Atomize both documents
|
|
263
|
+
* 5. (Optional) Apply numbering virtualization
|
|
264
|
+
* 6. Run LCS on atom hashes
|
|
265
|
+
* 7. Mark correlation status
|
|
266
|
+
* 8. Run move detection
|
|
267
|
+
* 9. Run format detection
|
|
268
|
+
* 10. Reconstruct document with track changes
|
|
269
|
+
* 11. Save and return result
|
|
270
|
+
*
|
|
271
|
+
* @param original - Original document as Buffer
|
|
272
|
+
* @param revised - Revised document as Buffer
|
|
273
|
+
* @param options - Pipeline options
|
|
274
|
+
* @returns Comparison result with track changes document
|
|
275
|
+
*/
|
|
276
|
+
export async function compareDocumentsAtomizer(original, revised, options = {}) {
|
|
277
|
+
const { author = 'Comparison', date = new Date(), moveDetection = {}, formatDetection = {}, numbering = {}, premergeRuns = false, reconstructionMode = 'rebuild', } = options;
|
|
278
|
+
// Merge settings with defaults
|
|
279
|
+
const moveSettings = {
|
|
280
|
+
...DEFAULT_MOVE_DETECTION_SETTINGS,
|
|
281
|
+
...moveDetection,
|
|
282
|
+
};
|
|
283
|
+
const formatSettings = {
|
|
284
|
+
...DEFAULT_FORMAT_DETECTION_SETTINGS,
|
|
285
|
+
...formatDetection,
|
|
286
|
+
};
|
|
287
|
+
const numberingSettings = {
|
|
288
|
+
...DEFAULT_NUMBERING_OPTIONS,
|
|
289
|
+
...numbering,
|
|
290
|
+
};
|
|
291
|
+
// Step 1: Load DOCX archives
|
|
292
|
+
const originalArchive = await DocxArchive.load(original);
|
|
293
|
+
const revisedArchive = await DocxArchive.load(revised);
|
|
294
|
+
// Step 2: Extract document.xml
|
|
295
|
+
const originalXml = await originalArchive.getDocumentXml();
|
|
296
|
+
const revisedXml = await revisedArchive.getDocumentXml();
|
|
297
|
+
// Extract numbering.xml if available
|
|
298
|
+
const originalNumberingXml = await originalArchive.getNumberingXml() ?? undefined;
|
|
299
|
+
const revisedNumberingXml = await revisedArchive.getNumberingXml() ?? undefined;
|
|
300
|
+
const originalPart = {
|
|
301
|
+
uri: 'word/document.xml',
|
|
302
|
+
contentType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml',
|
|
303
|
+
};
|
|
304
|
+
const revisedPart = {
|
|
305
|
+
uri: 'word/document.xml',
|
|
306
|
+
contentType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml',
|
|
307
|
+
};
|
|
308
|
+
const originalTextForRoundTrip = extractTextWithParagraphs(originalXml);
|
|
309
|
+
const revisedTextForRoundTrip = extractTextWithParagraphs(revisedXml);
|
|
310
|
+
const originalBookmarkDiagnostics = collectBookmarkDiagnostics(originalXml);
|
|
311
|
+
const revisedBookmarkDiagnostics = collectBookmarkDiagnostics(revisedXml);
|
|
312
|
+
const runComparisonPass = (atomizeOptions, outputMode) => {
|
|
313
|
+
// Parse fresh trees for each pass because inplace reconstruction mutates revised AST.
|
|
314
|
+
const originalTree = parseDocumentXml(originalXml);
|
|
315
|
+
const revisedTree = parseDocumentXml(revisedXml);
|
|
316
|
+
backfillParentReferences(originalTree);
|
|
317
|
+
backfillParentReferences(revisedTree);
|
|
318
|
+
const originalBody = findBody(originalTree);
|
|
319
|
+
const revisedBody = findBody(revisedTree);
|
|
320
|
+
if (!originalBody || !revisedBody) {
|
|
321
|
+
throw new Error('Could not find w:body in one or both documents');
|
|
322
|
+
}
|
|
323
|
+
if (premergeRuns) {
|
|
324
|
+
premergeAdjacentRuns(originalBody);
|
|
325
|
+
premergeAdjacentRuns(revisedBody);
|
|
326
|
+
}
|
|
327
|
+
const { atoms: originalAtoms } = atomizeTree(originalBody, [], originalPart, atomizeOptions);
|
|
328
|
+
const { atoms: revisedAtoms } = atomizeTree(revisedBody, [], revisedPart, atomizeOptions);
|
|
329
|
+
// Assign paragraph indices for proper grouping during reconstruction
|
|
330
|
+
assignParagraphIndices(originalAtoms);
|
|
331
|
+
assignParagraphIndices(revisedAtoms);
|
|
332
|
+
// Step 5: Apply numbering virtualization (optional)
|
|
333
|
+
if (numberingSettings.enabled) {
|
|
334
|
+
virtualizeNumberingLabels(originalAtoms, originalNumberingXml, numberingSettings);
|
|
335
|
+
virtualizeNumberingLabels(revisedAtoms, revisedNumberingXml, numberingSettings);
|
|
336
|
+
}
|
|
337
|
+
// Step 6: Run hierarchical LCS (paragraph-level first, then atom-level within)
|
|
338
|
+
const lcsResult = hierarchicalCompare(originalAtoms, revisedAtoms);
|
|
339
|
+
// Step 7: Mark correlation status using hierarchical result
|
|
340
|
+
markHierarchicalCorrelationStatus(originalAtoms, revisedAtoms, lcsResult);
|
|
341
|
+
// Step 8: Run move detection
|
|
342
|
+
if (moveSettings.detectMoves) {
|
|
343
|
+
// Create a combined list for move detection
|
|
344
|
+
// Move detection looks at the revised atoms with Inserted status
|
|
345
|
+
// and original atoms with Deleted status
|
|
346
|
+
const allAtoms = [...originalAtoms, ...revisedAtoms];
|
|
347
|
+
detectMovesInAtomList(allAtoms, moveSettings);
|
|
348
|
+
}
|
|
349
|
+
// Step 9: Run format detection
|
|
350
|
+
if (formatSettings.detectFormatChanges) {
|
|
351
|
+
// Format detection operates on the revised atoms that are Equal
|
|
352
|
+
detectFormatChangesInAtomList(revisedAtoms, formatSettings);
|
|
353
|
+
}
|
|
354
|
+
// Step 10: Create merged atom list for reconstruction
|
|
355
|
+
const mergedAtoms = createMergedAtomList(originalAtoms, revisedAtoms, lcsResult);
|
|
356
|
+
// Step 10b: Assign unified paragraph indices to handle atoms from different trees
|
|
357
|
+
assignUnifiedParagraphIndices(originalAtoms, revisedAtoms, mergedAtoms, lcsResult);
|
|
358
|
+
// Step 11: Reconstruct document with track changes
|
|
359
|
+
let newDocumentXml;
|
|
360
|
+
if (outputMode === 'inplace') {
|
|
361
|
+
// In-place mode: modify the revised AST directly, producing revised-based output.
|
|
362
|
+
newDocumentXml = modifyRevisedDocument(revisedTree, originalAtoms, revisedAtoms, mergedAtoms, { author, date });
|
|
363
|
+
}
|
|
364
|
+
else {
|
|
365
|
+
// Rebuild mode: reconstruct from atoms using original as the structural base.
|
|
366
|
+
newDocumentXml = reconstructDocument(mergedAtoms, originalXml, { author, date });
|
|
367
|
+
}
|
|
368
|
+
return { mergedAtoms, newDocumentXml, outputMode };
|
|
369
|
+
};
|
|
370
|
+
const evaluateRoundTripSafety = (candidateXml) => evaluateSafetyChecks(originalTextForRoundTrip, revisedTextForRoundTrip, originalBookmarkDiagnostics, revisedBookmarkDiagnostics, candidateXml);
|
|
371
|
+
let comparisonResult;
|
|
372
|
+
let fallbackReason;
|
|
373
|
+
let fallbackDiagnostics;
|
|
374
|
+
if (reconstructionMode === 'inplace') {
|
|
375
|
+
// Adaptive strategy:
|
|
376
|
+
// 1) Try no-cross-run passes first (higher run anchoring fidelity).
|
|
377
|
+
// 2) If safety fails, retry with cross-run merging to handle run-fragmented docs.
|
|
378
|
+
// 3) If still unsafe, reuse rebuild reconstruction as a hard safety fallback.
|
|
379
|
+
const inplacePasses = [
|
|
380
|
+
{
|
|
381
|
+
pass: 'inplace_word_split',
|
|
382
|
+
atomizeOptions: {
|
|
383
|
+
cloneLeafNodes: true,
|
|
384
|
+
mergeAcrossRuns: false,
|
|
385
|
+
mergePunctuationAcrossRuns: false,
|
|
386
|
+
splitTextIntoWords: true,
|
|
387
|
+
},
|
|
388
|
+
},
|
|
389
|
+
{
|
|
390
|
+
pass: 'inplace_run_level',
|
|
391
|
+
atomizeOptions: {
|
|
392
|
+
cloneLeafNodes: true,
|
|
393
|
+
mergeAcrossRuns: false,
|
|
394
|
+
mergePunctuationAcrossRuns: false,
|
|
395
|
+
splitTextIntoWords: false,
|
|
396
|
+
},
|
|
397
|
+
},
|
|
398
|
+
{
|
|
399
|
+
pass: 'inplace_word_split_cross_run',
|
|
400
|
+
atomizeOptions: {
|
|
401
|
+
cloneLeafNodes: true,
|
|
402
|
+
mergeAcrossRuns: true,
|
|
403
|
+
mergePunctuationAcrossRuns: true,
|
|
404
|
+
splitTextIntoWords: true,
|
|
405
|
+
},
|
|
406
|
+
},
|
|
407
|
+
{
|
|
408
|
+
pass: 'inplace_run_level_cross_run',
|
|
409
|
+
atomizeOptions: {
|
|
410
|
+
cloneLeafNodes: true,
|
|
411
|
+
mergeAcrossRuns: true,
|
|
412
|
+
mergePunctuationAcrossRuns: true,
|
|
413
|
+
splitTextIntoWords: false,
|
|
414
|
+
},
|
|
415
|
+
},
|
|
416
|
+
];
|
|
417
|
+
const failedAttempts = [];
|
|
418
|
+
let selected;
|
|
419
|
+
for (const { pass, atomizeOptions } of inplacePasses) {
|
|
420
|
+
const candidate = runComparisonPass(atomizeOptions, 'inplace');
|
|
421
|
+
const safety = evaluateRoundTripSafety(candidate.newDocumentXml);
|
|
422
|
+
if (safety.safe) {
|
|
423
|
+
selected = candidate;
|
|
424
|
+
break;
|
|
425
|
+
}
|
|
426
|
+
failedAttempts.push({
|
|
427
|
+
pass,
|
|
428
|
+
checks: safety.checks,
|
|
429
|
+
failedChecks: safety.failedChecks,
|
|
430
|
+
failureDetails: safety.failureDetails,
|
|
431
|
+
firstDiffSummary: safety.failureSummary,
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
if (selected) {
|
|
435
|
+
comparisonResult = selected;
|
|
436
|
+
}
|
|
437
|
+
else {
|
|
438
|
+
comparisonResult = runComparisonPass(undefined, 'rebuild');
|
|
439
|
+
fallbackReason = 'round_trip_safety_check_failed';
|
|
440
|
+
fallbackDiagnostics = {
|
|
441
|
+
attempts: failedAttempts,
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
else {
|
|
446
|
+
comparisonResult = runComparisonPass(undefined, 'rebuild');
|
|
447
|
+
}
|
|
448
|
+
const { mergedAtoms, newDocumentXml } = comparisonResult;
|
|
449
|
+
// Step 12: Clone appropriate archive and update document.xml.
|
|
450
|
+
// Use the revised archive only for true inplace output.
|
|
451
|
+
const baseArchive = comparisonResult.outputMode === 'inplace' ? revisedArchive : originalArchive;
|
|
452
|
+
const resultArchive = await baseArchive.clone();
|
|
453
|
+
resultArchive.setDocumentXml(newDocumentXml);
|
|
454
|
+
// Step 13: Save result and compute stats
|
|
455
|
+
const resultBuffer = await resultArchive.save();
|
|
456
|
+
const stats = computeStats(mergedAtoms);
|
|
457
|
+
return {
|
|
458
|
+
document: resultBuffer,
|
|
459
|
+
stats,
|
|
460
|
+
engine: 'atomizer',
|
|
461
|
+
reconstructionModeRequested: reconstructionMode,
|
|
462
|
+
reconstructionModeUsed: comparisonResult.outputMode,
|
|
463
|
+
fallbackReason,
|
|
464
|
+
fallbackDiagnostics,
|
|
465
|
+
};
|
|
466
|
+
}
|
|
467
|
+
/**
|
|
468
|
+
* Compute comparison statistics from merged atoms.
|
|
469
|
+
*/
|
|
470
|
+
function computeStats(mergedAtoms) {
|
|
471
|
+
const reconstructionStats = computeReconstructionStats(mergedAtoms);
|
|
472
|
+
// Count unique paragraphs for modifications
|
|
473
|
+
// A modification is when we have both deleted and inserted atoms in the same paragraph
|
|
474
|
+
const modifiedParagraphs = new Set();
|
|
475
|
+
let currentParagraph = '';
|
|
476
|
+
let hasDeleted = false;
|
|
477
|
+
let hasInserted = false;
|
|
478
|
+
for (const atom of mergedAtoms) {
|
|
479
|
+
// Detect paragraph boundaries
|
|
480
|
+
const pAncestor = atom.ancestorElements.find((a) => a.tagName === 'w:p');
|
|
481
|
+
const paragraphId = pAncestor
|
|
482
|
+
? `${atom.part.uri}:${atom.ancestorElements.indexOf(pAncestor)}`
|
|
483
|
+
: '';
|
|
484
|
+
if (paragraphId !== currentParagraph) {
|
|
485
|
+
// Check previous paragraph
|
|
486
|
+
if (currentParagraph && hasDeleted && hasInserted) {
|
|
487
|
+
modifiedParagraphs.add(currentParagraph);
|
|
488
|
+
}
|
|
489
|
+
currentParagraph = paragraphId;
|
|
490
|
+
hasDeleted = false;
|
|
491
|
+
hasInserted = false;
|
|
492
|
+
}
|
|
493
|
+
if (atom.correlationStatus === CorrelationStatus.Deleted) {
|
|
494
|
+
hasDeleted = true;
|
|
495
|
+
}
|
|
496
|
+
else if (atom.correlationStatus === CorrelationStatus.Inserted) {
|
|
497
|
+
hasInserted = true;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
// Check last paragraph
|
|
501
|
+
if (currentParagraph && hasDeleted && hasInserted) {
|
|
502
|
+
modifiedParagraphs.add(currentParagraph);
|
|
503
|
+
}
|
|
504
|
+
return {
|
|
505
|
+
insertions: reconstructionStats.insertions,
|
|
506
|
+
deletions: reconstructionStats.deletions,
|
|
507
|
+
modifications: modifiedParagraphs.size + reconstructionStats.formatChanges,
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
//# sourceMappingURL=pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../src/baselines/atomizer/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,kCAAkC,CAAC;AAyB/D,OAAO,EACL,+BAA+B,EAC/B,iCAAiC,EACjC,iBAAiB,GAClB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,WAAW,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EAAE,6BAA6B,EAAE,MAAM,2BAA2B,CAAC;AAC1E,OAAO,EACL,gBAAgB,EAChB,QAAQ,EACR,wBAAwB,GACzB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAC1E,OAAO,EACL,oBAAoB,EACpB,6BAA6B,GAC9B,MAAM,cAAc,CAAC;AACtB,OAAO,EACL,mBAAmB,EACnB,iCAAiC,GAClC,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACL,mBAAmB,EACnB,0BAA0B,GAC3B,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EACL,gBAAgB,EAChB,gBAAgB,EAChB,yBAAyB,EACzB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,OAAO,EACL,yBAAyB,EAEzB,yBAAyB,GAC1B,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AAgDzD,SAAS,WAAW,CAAC,CAAW,EAAE,CAAW;IAC3C,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;IAClC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,8BAA8B,CAAC,IAAyC;IAC/E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,QAAQ,GAAG,iCAAiC,CAAC;IAEnD,KAAK,MAAM,IAAI,IAAI,gBAAgB,CAAC,IAAI,EAAE,aAAa,CAAC,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACtC,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI;gBAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AACjC,CAAC;AAED,SAAS,0BAA0B,CAAC,WAAmB;IACrD,MAAM,IAAI,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACvC,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC5C,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAC1C,MAAM,qBAAqB,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhD,KAAK,MAAM,IAAI,IAAI,gBAAgB,CAAC,IAAI,EAAE,iBAAiB,CAAC,EAAE,CAAC;QAC7D,MAAM,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACrC,IAAI,CAAC,EAAE;YAAE,SAAS;QAClB,IAAI,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,iBAAiB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;;YAC3C,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QACzC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC;gBAAE,qBAAqB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;;gBACvD,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,gBAAgB,CAAC,IAAI,EAAE,eAAe,CAAC,EAAE,CAAC;QAC3D,MAAM,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACrC,IAAI,CAAC,EAAE;YAAE,SAAS;QAClB,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,eAAe,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;;YACvC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACtB,CAAC;IAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IACzC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IACnD,MAAM,uBAAuB,GAAG,8BAA8B,CAAC,IAAI,CAAC,CAAC;IACrE,MAAM,wBAAwB,GAAG,uBAAuB;SACrD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;SACzC,IAAI,EAAE,CAAC;IACV,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACnE,MAAM,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAEjE,OAAO;QACL,QAAQ;QACR,MAAM;QACN,UAAU;QACV,mBAAmB,EAAE,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,IAAI,EAAE;QAC7D,uBAAuB;QACvB,wBAAwB;QACxB,iBAAiB,EAAE,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,EAAE;QACvD,eAAe,EAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,IAAI,EAAE;QACnD,iBAAiB;QACjB,eAAe;KAChB,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,SAAS,oCAAoC,CAC3C,QAA6B,EAC7B,MAA2B;IAE3B,OAAO,CACL,WAAW,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC;QACnD,WAAW,CAAC,QAAQ,CAAC,mBAAmB,EAAE,MAAM,CAAC,mBAAmB,CAAC;QACrE,WAAW,CAAC,QAAQ,CAAC,uBAAuB,EAAE,MAAM,CAAC,uBAAuB,CAAC;QAC7E,WAAW,CAAC,QAAQ,CAAC,wBAAwB,EAAE,MAAM,CAAC,wBAAwB,CAAC;QAC/E,WAAW,CAAC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,CAAC;QACjE,WAAW,CAAC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC,eAAe,CAAC;QAC7D,WAAW,CAAC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,CAAC;QACjE,WAAW,CAAC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC,eAAe,CAAC,CAC9D,CAAC;AACJ,CAAC;AAED,SAAS,OAAO,CAAC,QAAkB,EAAE,MAAgB;IACnD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5D,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAC/D,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AACjC,CAAC;AAED,SAAS,wBAAwB,CAAC,YAAoB,EAAE,UAAkB;IACxE,MAAM,UAAU,GAAG,YAAY,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAC1D,MAAM,aAAa,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/C,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IAElE,IAAI,4BAA4B,GAAG,CAAC,CAAC,CAAC;IACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YACxD,4BAA4B,GAAG,CAAC,CAAC;YACjC,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO;QACL,cAAc,EAAE,UAAU,CAAC,cAAc;QACzC,YAAY,EAAE,UAAU,CAAC,YAAY;QACrC,4BAA4B;QAC5B,iBAAiB,EACf,4BAA4B,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,4BAA4B,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE;QAC9F,eAAe,EACb,4BAA4B,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,4BAA4B,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE;QAC5F,gBAAgB,EAAE,UAAU,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;KACrD,CAAC;AACJ,CAAC;AAED,SAAS,4BAA4B,CACnC,QAA6B,EAC7B,MAA2B;IAE3B,OAAO;QACL,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC;QAC3D,uBAAuB,EAAE,OAAO,CAAC,QAAQ,CAAC,uBAAuB,EAAE,MAAM,CAAC,uBAAuB,CAAC;QAClG,wBAAwB,EAAE,OAAO,CAAC,QAAQ,CAAC,wBAAwB,EAAE,MAAM,CAAC,wBAAwB,CAAC;QACrG,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,CAAC;QACrD,MAAM,EAAE,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;QAC/C,2BAA2B,EAAE,QAAQ,CAAC,mBAAmB;QACzD,yBAAyB,EAAE,MAAM,CAAC,mBAAmB;QACrD,yBAAyB,EAAE,QAAQ,CAAC,iBAAiB;QACrD,uBAAuB,EAAE,MAAM,CAAC,iBAAiB;QACjD,uBAAuB,EAAE,QAAQ,CAAC,eAAe;QACjD,qBAAqB,EAAE,MAAM,CAAC,eAAe;QAC7C,yBAAyB,EAAE,QAAQ,CAAC,iBAAiB;QACrD,uBAAuB,EAAE,MAAM,CAAC,iBAAiB;QACjD,uBAAuB,EAAE,QAAQ,CAAC,eAAe;QACjD,qBAAqB,EAAE,MAAM,CAAC,eAAe;KAC9C,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,KAA4B;IACpD,OAAO;QACL,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC,MAAM;QAClC,eAAe,EAAE,KAAK,CAAC,UAAU,CAAC,MAAM;QACxC,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9B,eAAe,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC;KACrC,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAa,EAAE,SAAS,GAAG,GAAG;IACxD,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,CAAC;AAC3C,CAAC;AAED,SAAS,qBAAqB,CAC5B,OAA0C;IAE1C,OAAO;QACL,4BAA4B,EAAE,OAAO,CAAC,4BAA4B;QAClE,iBAAiB,EAAE,kBAAkB,CAAC,OAAO,CAAC,iBAAiB,CAAC;QAChE,eAAe,EAAE,kBAAkB,CAAC,OAAO,CAAC,eAAe,CAAC;QAC5D,eAAe,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,gBAAgB;KACjE,CAAC;AACJ,CAAC;AAED,SAAS,yBAAyB,CAChC,OAA8C;IAE9C,OAAO;QACL,UAAU,EAAE,gBAAgB,CAAC,OAAO,CAAC,UAAU,CAAC;QAChD,uBAAuB,EAAE,gBAAgB,CAAC,OAAO,CAAC,uBAAuB,CAAC;QAC1E,wBAAwB,EAAE,gBAAgB,CAAC,OAAO,CAAC,wBAAwB,CAAC;QAC5E,QAAQ,EAAE,gBAAgB,CAAC,OAAO,CAAC,QAAQ,CAAC;QAC5C,MAAM,EAAE,gBAAgB,CAAC,OAAO,CAAC,MAAM,CAAC;QACxC,mBAAmB,EAAE,OAAO,CAAC,uBAAuB,CAAC,MAAM;QAC3D,iBAAiB,EAAE,OAAO,CAAC,qBAAqB,CAAC,MAAM;QACvD,qBAAqB,EAAE,OAAO,CAAC,uBAAuB,CAAC,CAAC,CAAC;QACzD,mBAAmB,EAAE,OAAO,CAAC,qBAAqB,CAAC,CAAC,CAAC;KACtD,CAAC;AACJ,CAAC;AAED,SAAS,mBAAmB,CAC1B,cAA8D;IAE9D,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,OAAO,GAAuC,EAAE,CAAC;IACvD,IAAI,cAAc,CAAC,UAAU,EAAE,CAAC;QAC9B,OAAO,CAAC,UAAU,GAAG,qBAAqB,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;IACxE,CAAC;IACD,IAAI,cAAc,CAAC,UAAU,EAAE,CAAC;QAC9B,OAAO,CAAC,UAAU,GAAG,qBAAqB,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;IACxE,CAAC;IACD,IAAI,cAAc,CAAC,eAAe,EAAE,CAAC;QACnC,OAAO,CAAC,eAAe,GAAG,yBAAyB,CAAC,cAAc,CAAC,eAAe,CAAC,CAAC;IACtF,CAAC;IACD,IAAI,cAAc,CAAC,eAAe,EAAE,CAAC;QACnC,OAAO,CAAC,eAAe,GAAG,yBAAyB,CAAC,cAAc,CAAC,eAAe,CAAC,CAAC;IACtF,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;AAC/D,CAAC;AAED,SAAS,oBAAoB,CAC3B,wBAAgC,EAChC,uBAA+B,EAC/B,2BAAgD,EAChD,0BAA+C,EAC/C,YAAoB;IAQpB,MAAM,WAAW,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,WAAW,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,YAAY,GAAG,yBAAyB,CAAC,WAAW,CAAC,CAAC;IAC5D,MAAM,YAAY,GAAG,yBAAyB,CAAC,WAAW,CAAC,CAAC;IAC5D,MAAM,2BAA2B,GAAG,0BAA0B,CAAC,WAAW,CAAC,CAAC;IAC5E,MAAM,2BAA2B,GAAG,0BAA0B,CAAC,WAAW,CAAC,CAAC;IAC5E,MAAM,oBAAoB,GAAG,YAAY,CAAC,uBAAuB,EAAE,YAAY,CAAC,CAAC;IACjF,MAAM,oBAAoB,GAAG,YAAY,CAAC,wBAAwB,EAAE,YAAY,CAAC,CAAC;IAElF,MAAM,MAAM,GAA+B;QACzC,UAAU,EAAE,oBAAoB,CAAC,mBAAmB;QACpD,UAAU,EAAE,oBAAoB,CAAC,mBAAmB;QACpD,eAAe,EAAE,oCAAoC,CACnD,0BAA0B,EAC1B,2BAA2B,CAC5B;QACD,eAAe,EAAE,oCAAoC,CACnD,2BAA2B,EAC3B,2BAA2B,CAC5B;KACF,CAAC;IAEF,MAAM,YAAY,GAAqC,MAAM,CAAC,OAAO,CAAC,MAAM,CAE1E;SACC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC;SACvB,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;IAEzB,MAAM,cAAc,GAAuC,EAAE,CAAC;IAC9D,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;QACvB,cAAc,CAAC,UAAU,GAAG,wBAAwB,CAAC,uBAAuB,EAAE,YAAY,CAAC,CAAC;IAC9F,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;QACvB,cAAc,CAAC,UAAU,GAAG,wBAAwB,CAAC,wBAAwB,EAAE,YAAY,CAAC,CAAC;IAC/F,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;QAC5B,cAAc,CAAC,eAAe,GAAG,4BAA4B,CAC3D,0BAA0B,EAC1B,2BAA2B,CAC5B,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;QAC5B,cAAc,CAAC,eAAe,GAAG,4BAA4B,CAC3D,2BAA2B,EAC3B,2BAA2B,CAC5B,CAAC;IACJ,CAAC;IAED,OAAO;QACL,IAAI,EAAE,YAAY,CAAC,MAAM,KAAK,CAAC;QAC/B,MAAM;QACN,YAAY;QACZ,cAAc,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS;QACpE,cAAc,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,SAAS;KAC1F,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,QAAgB,EAChB,OAAe,EACf,UAA2B,EAAE;IAE7B,MAAM,EACJ,MAAM,GAAG,YAAY,EACrB,IAAI,GAAG,IAAI,IAAI,EAAE,EACjB,aAAa,GAAG,EAAE,EAClB,eAAe,GAAG,EAAE,EACpB,SAAS,GAAG,EAAE,EACd,YAAY,GAAG,KAAK,EACpB,kBAAkB,GAAG,SAAS,GAC/B,GAAG,OAAO,CAAC;IAEZ,+BAA+B;IAC/B,MAAM,YAAY,GAA0B;QAC1C,GAAG,+BAA+B;QAClC,GAAG,aAAa;KACjB,CAAC;IAEF,MAAM,cAAc,GAA4B;QAC9C,GAAG,iCAAiC;QACpC,GAAG,eAAe;KACnB,CAAC;IAEF,MAAM,iBAAiB,GAAgC;QACrD,GAAG,yBAAyB;QAC5B,GAAG,SAAS;KACb,CAAC;IAEF,6BAA6B;IAC7B,MAAM,eAAe,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzD,MAAM,cAAc,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEvD,+BAA+B;IAC/B,MAAM,WAAW,GAAG,MAAM,eAAe,CAAC,cAAc,EAAE,CAAC;IAC3D,MAAM,UAAU,GAAG,MAAM,cAAc,CAAC,cAAc,EAAE,CAAC;IAEzD,qCAAqC;IACrC,MAAM,oBAAoB,GAAG,MAAM,eAAe,CAAC,eAAe,EAAE,IAAI,SAAS,CAAC;IAClF,MAAM,mBAAmB,GAAG,MAAM,cAAc,CAAC,eAAe,EAAE,IAAI,SAAS,CAAC;IAEhF,MAAM,YAAY,GAAY;QAC5B,GAAG,EAAE,mBAAmB;QACxB,WAAW,EAAE,kFAAkF;KAChG,CAAC;IAEF,MAAM,WAAW,GAAY;QAC3B,GAAG,EAAE,mBAAmB;QACxB,WAAW,EAAE,kFAAkF;KAChG,CAAC;IAEF,MAAM,wBAAwB,GAAG,yBAAyB,CAAC,WAAW,CAAC,CAAC;IACxE,MAAM,uBAAuB,GAAG,yBAAyB,CAAC,UAAU,CAAC,CAAC;IACtE,MAAM,2BAA2B,GAAG,0BAA0B,CAAC,WAAW,CAAC,CAAC;IAC5E,MAAM,0BAA0B,GAAG,0BAA0B,CAAC,UAAU,CAAC,CAAC;IAE1E,MAAM,iBAAiB,GAAG,CACxB,cAA6D,EAC7D,UAA8B,EAK9B,EAAE;QACF,sFAAsF;QACtF,MAAM,YAAY,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACnD,MAAM,WAAW,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACjD,wBAAwB,CAAC,YAAY,CAAC,CAAC;QACvC,wBAAwB,CAAC,WAAW,CAAC,CAAC;QAEtC,MAAM,YAAY,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;QAC5C,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC;QAC1C,IAAI,CAAC,YAAY,IAAI,CAAC,WAAW,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACpE,CAAC;QAED,IAAI,YAAY,EAAE,CAAC;YACjB,oBAAoB,CAAC,YAAY,CAAC,CAAC;YACnC,oBAAoB,CAAC,WAAW,CAAC,CAAC;QACpC,CAAC;QAED,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,WAAW,CAAC,YAAY,EAAE,EAAE,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;QAC7F,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,WAAW,CAAC,WAAW,EAAE,EAAE,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC;QAE1F,qEAAqE;QACrE,sBAAsB,CAAC,aAAa,CAAC,CAAC;QACtC,sBAAsB,CAAC,YAAY,CAAC,CAAC;QAErC,oDAAoD;QACpD,IAAI,iBAAiB,CAAC,OAAO,EAAE,CAAC;YAC9B,yBAAyB,CAAC,aAAa,EAAE,oBAAoB,EAAE,iBAAiB,CAAC,CAAC;YAClF,yBAAyB,CAAC,YAAY,EAAE,mBAAmB,EAAE,iBAAiB,CAAC,CAAC;QAClF,CAAC;QAED,+EAA+E;QAC/E,MAAM,SAAS,GAAG,mBAAmB,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAEnE,4DAA4D;QAC5D,iCAAiC,CAAC,aAAa,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC;QAE1E,6BAA6B;QAC7B,IAAI,YAAY,CAAC,WAAW,EAAE,CAAC;YAC7B,4CAA4C;YAC5C,iEAAiE;YACjE,yCAAyC;YACzC,MAAM,QAAQ,GAAG,CAAC,GAAG,aAAa,EAAE,GAAG,YAAY,CAAC,CAAC;YACrD,qBAAqB,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAChD,CAAC;QAED,+BAA+B;QAC/B,IAAI,cAAc,CAAC,mBAAmB,EAAE,CAAC;YACvC,gEAAgE;YAChE,6BAA6B,CAAC,YAAY,EAAE,cAAc,CAAC,CAAC;QAC9D,CAAC;QAED,sDAAsD;QACtD,MAAM,WAAW,GAAG,oBAAoB,CAAC,aAAa,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC;QAEjF,kFAAkF;QAClF,6BAA6B,CAAC,aAAa,EAAE,YAAY,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;QAEnF,mDAAmD;QACnD,IAAI,cAAsB,CAAC;QAC3B,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;YAC7B,kFAAkF;YAClF,cAAc,GAAG,qBAAqB,CACpC,WAAW,EACX,aAAa,EACb,YAAY,EACZ,WAAW,EACX,EAAE,MAAM,EAAE,IAAI,EAAE,CACjB,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,8EAA8E;YAC9E,cAAc,GAAG,mBAAmB,CAAC,WAAW,EAAE,WAAW,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;QACnF,CAAC;QAED,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,UAAU,EAAE,CAAC;IACrD,CAAC,CAAC;IAEF,MAAM,uBAAuB,GAAG,CAAC,YAAoB,EAAE,EAAE,CACvD,oBAAoB,CAClB,wBAAwB,EACxB,uBAAuB,EACvB,2BAA2B,EAC3B,0BAA0B,EAC1B,YAAY,CACb,CAAC;IAEJ,IAAI,gBAIH,CAAC;IACF,IAAI,cAAwD,CAAC;IAC7D,IAAI,mBAAkE,CAAC;IACvE,IAAI,kBAAkB,KAAK,SAAS,EAAE,CAAC;QACrC,qBAAqB;QACrB,oEAAoE;QACpE,kFAAkF;QAClF,8EAA8E;QAC9E,MAAM,aAAa,GAGd;YACH;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,cAAc,EAAE;oBACd,cAAc,EAAE,IAAI;oBACpB,eAAe,EAAE,KAAK;oBACtB,0BAA0B,EAAE,KAAK;oBACjC,kBAAkB,EAAE,IAAI;iBACzB;aACF;YACD;gBACE,IAAI,EAAE,mBAAmB;gBACzB,cAAc,EAAE;oBACd,cAAc,EAAE,IAAI;oBACpB,eAAe,EAAE,KAAK;oBACtB,0BAA0B,EAAE,KAAK;oBACjC,kBAAkB,EAAE,KAAK;iBAC1B;aACF;YACD;gBACE,IAAI,EAAE,8BAA8B;gBACpC,cAAc,EAAE;oBACd,cAAc,EAAE,IAAI;oBACpB,eAAe,EAAE,IAAI;oBACrB,0BAA0B,EAAE,IAAI;oBAChC,kBAAkB,EAAE,IAAI;iBACzB;aACF;YACD;gBACE,IAAI,EAAE,6BAA6B;gBACnC,cAAc,EAAE;oBACd,cAAc,EAAE,IAAI;oBACpB,eAAe,EAAE,IAAI;oBACrB,0BAA0B,EAAE,IAAI;oBAChC,kBAAkB,EAAE,KAAK;iBAC1B;aACF;SACF,CAAC;QAEF,MAAM,cAAc,GAAuC,EAAE,CAAC;QAC9D,IAAI,QAA6C,CAAC;QAClD,KAAK,MAAM,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,aAAa,EAAE,CAAC;YACrD,MAAM,SAAS,GAAG,iBAAiB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;YAC/D,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;YAEjE,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBAChB,QAAQ,GAAG,SAAS,CAAC;gBACrB,MAAM;YACR,CAAC;YAED,cAAc,CAAC,IAAI,CAAC;gBAClB,IAAI;gBACJ,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,YAAY,EAAE,MAAM,CAAC,YAAY;gBACjC,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,gBAAgB,EAAE,MAAM,CAAC,cAAc;aACxC,CAAC,CAAC;QACL,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,gBAAgB,GAAG,QAAQ,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,gBAAgB,GAAG,iBAAiB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YAC3D,cAAc,GAAG,gCAAgC,CAAC;YAClD,mBAAmB,GAAG;gBACpB,QAAQ,EAAE,cAAc;aACzB,CAAC;QACJ,CAAC;IACH,CAAC;SAAM,CAAC;QACN,gBAAgB,GAAG,iBAAiB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;IAC7D,CAAC;IAED,MAAM,EAAE,WAAW,EAAE,cAAc,EAAE,GAAG,gBAAgB,CAAC;IAEzD,8DAA8D;IAC9D,wDAAwD;IACxD,MAAM,WAAW,GAAG,gBAAgB,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,CAAC;IACjG,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,KAAK,EAAE,CAAC;IAChD,aAAa,CAAC,cAAc,CAAC,cAAc,CAAC,CAAC;IAE7C,yCAAyC;IACzC,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC;IAChD,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;IAExC,OAAO;QACL,QAAQ,EAAE,YAAY;QACtB,KAAK;QACL,MAAM,EAAE,UAAmB;QAC3B,2BAA2B,EAAE,kBAAkB;QAC/C,sBAAsB,EAAE,gBAAgB,CAAC,UAAU;QACnD,cAAc;QACd,mBAAmB;KACpB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,WAAiC;IACrD,MAAM,mBAAmB,GAAG,0BAA0B,CAAC,WAAW,CAAC,CAAC;IAEpE,4CAA4C;IAC5C,uFAAuF;IACvF,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAU,CAAC;IAE7C,IAAI,gBAAgB,GAAG,EAAE,CAAC;IAC1B,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,8BAA8B;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,KAAK,CAAC,CAAC;QACzE,MAAM,WAAW,GAAG,SAAS;YAC3B,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE;YAChE,CAAC,CAAC,EAAE,CAAC;QAEP,IAAI,WAAW,KAAK,gBAAgB,EAAE,CAAC;YACrC,2BAA2B;YAC3B,IAAI,gBAAgB,IAAI,UAAU,IAAI,WAAW,EAAE,CAAC;gBAClD,kBAAkB,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;YAC3C,CAAC;YACD,gBAAgB,GAAG,WAAW,CAAC;YAC/B,UAAU,GAAG,KAAK,CAAC;YACnB,WAAW,GAAG,KAAK,CAAC;QACtB,CAAC;QAED,IAAI,IAAI,CAAC,iBAAiB,KAAK,iBAAiB,CAAC,OAAO,EAAE,CAAC;YACzD,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,IAAI,IAAI,CAAC,iBAAiB,KAAK,iBAAiB,CAAC,QAAQ,EAAE,CAAC;YACjE,WAAW,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,IAAI,gBAAgB,IAAI,UAAU,IAAI,WAAW,EAAE,CAAC;QAClD,kBAAkB,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO;QACL,UAAU,EAAE,mBAAmB,CAAC,UAAU;QAC1C,SAAS,EAAE,mBAAmB,CAAC,SAAS;QACxC,aAAa,EAAE,kBAAkB,CAAC,IAAI,GAAG,mBAAmB,CAAC,aAAa;KAC3E,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-Compare Run Pre-Merge
|
|
3
|
+
*
|
|
4
|
+
* Optional normalization step to merge adjacent <w:r> siblings with identical
|
|
5
|
+
* formatting before atomization.
|
|
6
|
+
*
|
|
7
|
+
* Motivation:
|
|
8
|
+
* - Some documents are heavily fragmented into multiple runs even when the
|
|
9
|
+
* formatting is identical. This can cause overly-granular diffs.
|
|
10
|
+
* - For `reconstructionMode: 'inplace'`, we intentionally disable atom-level
|
|
11
|
+
* cross-run text merging to keep atoms anchored to real runs. Pre-merging runs
|
|
12
|
+
* is a safer way to reduce fragmentation without creating atoms that span
|
|
13
|
+
* multiple runs.
|
|
14
|
+
*
|
|
15
|
+
* This step is intentionally conservative:
|
|
16
|
+
* - Only merges immediately-adjacent <w:r> siblings under the same parent.
|
|
17
|
+
* - Requires identical run attributes and identical <w:rPr> formatting subtree.
|
|
18
|
+
* - Only merges runs that contain a small, "safe" subset of child elements.
|
|
19
|
+
*/
|
|
20
|
+
/**
|
|
21
|
+
* Merge adjacent runs throughout a DOM Element subtree.
|
|
22
|
+
*
|
|
23
|
+
* @returns The number of merges performed.
|
|
24
|
+
*/
|
|
25
|
+
export declare function premergeAdjacentRuns(root: Element): number;
|
|
26
|
+
//# sourceMappingURL=premergeRuns.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"premergeRuns.d.ts","sourceRoot":"","sources":["../../../src/baselines/atomizer/premergeRuns.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AA2HH;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,OAAO,GAAG,MAAM,CAY1D"}
|