@usejunior/docx-core 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +86 -28
- package/dist/.tsbuildinfo +1 -0
- package/dist/atomizer.d.ts +218 -0
- package/dist/atomizer.d.ts.map +1 -0
- package/dist/atomizer.js +856 -0
- package/dist/atomizer.js.map +1 -0
- package/dist/baselines/atomizer/atomLcs.d.ts +96 -0
- package/dist/baselines/atomizer/atomLcs.d.ts.map +1 -0
- package/dist/baselines/atomizer/atomLcs.js +347 -0
- package/dist/baselines/atomizer/atomLcs.js.map +1 -0
- package/dist/baselines/atomizer/debug.d.ts +41 -0
- package/dist/baselines/atomizer/debug.d.ts.map +1 -0
- package/dist/baselines/atomizer/debug.js +85 -0
- package/dist/baselines/atomizer/debug.js.map +1 -0
- package/dist/baselines/atomizer/documentReconstructor.d.ts +64 -0
- package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -0
- package/dist/baselines/atomizer/documentReconstructor.js +939 -0
- package/dist/baselines/atomizer/documentReconstructor.js.map +1 -0
- package/dist/baselines/atomizer/hierarchicalLcs.d.ts +111 -0
- package/dist/baselines/atomizer/hierarchicalLcs.d.ts.map +1 -0
- package/dist/baselines/atomizer/hierarchicalLcs.js +469 -0
- package/dist/baselines/atomizer/hierarchicalLcs.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier.d.ts +183 -0
- package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier.js +1600 -0
- package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -0
- package/dist/baselines/atomizer/numberingIntegration.d.ts +59 -0
- package/dist/baselines/atomizer/numberingIntegration.d.ts.map +1 -0
- package/dist/baselines/atomizer/numberingIntegration.js +209 -0
- package/dist/baselines/atomizer/numberingIntegration.js.map +1 -0
- package/dist/baselines/atomizer/pipeline.d.ts +65 -0
- package/dist/baselines/atomizer/pipeline.d.ts.map +1 -0
- package/dist/baselines/atomizer/pipeline.js +510 -0
- package/dist/baselines/atomizer/pipeline.js.map +1 -0
- package/dist/baselines/atomizer/premergeRuns.d.ts +26 -0
- package/dist/baselines/atomizer/premergeRuns.d.ts.map +1 -0
- package/dist/baselines/atomizer/premergeRuns.js +150 -0
- package/dist/baselines/atomizer/premergeRuns.js.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.d.ts +63 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.d.ts.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.js +254 -0
- package/dist/baselines/atomizer/trackChangesAcceptor.js.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts +64 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js +586 -0
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -0
- package/dist/baselines/atomizer/xmlToWmlElement.d.ts +65 -0
- package/dist/baselines/atomizer/xmlToWmlElement.d.ts.map +1 -0
- package/dist/baselines/atomizer/xmlToWmlElement.js +95 -0
- package/dist/baselines/atomizer/xmlToWmlElement.js.map +1 -0
- package/dist/baselines/diffmatch/documentBuilder.d.ts +44 -0
- package/dist/baselines/diffmatch/documentBuilder.d.ts.map +1 -0
- package/dist/baselines/diffmatch/documentBuilder.js +227 -0
- package/dist/baselines/diffmatch/documentBuilder.js.map +1 -0
- package/dist/baselines/diffmatch/paragraphAlignment.d.ts +75 -0
- package/dist/baselines/diffmatch/paragraphAlignment.d.ts.map +1 -0
- package/dist/baselines/diffmatch/paragraphAlignment.js +206 -0
- package/dist/baselines/diffmatch/paragraphAlignment.js.map +1 -0
- package/dist/baselines/diffmatch/pipeline.d.ts +33 -0
- package/dist/baselines/diffmatch/pipeline.d.ts.map +1 -0
- package/dist/baselines/diffmatch/pipeline.js +84 -0
- package/dist/baselines/diffmatch/pipeline.js.map +1 -0
- package/dist/baselines/diffmatch/runDiff.d.ts +53 -0
- package/dist/baselines/diffmatch/runDiff.d.ts.map +1 -0
- package/dist/baselines/diffmatch/runDiff.js +253 -0
- package/dist/baselines/diffmatch/runDiff.js.map +1 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.d.ts +64 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.d.ts.map +1 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.js +178 -0
- package/dist/baselines/diffmatch/trackChangesRenderer.js.map +1 -0
- package/dist/baselines/diffmatch/xmlParser.d.ts +45 -0
- package/dist/baselines/diffmatch/xmlParser.d.ts.map +1 -0
- package/dist/baselines/diffmatch/xmlParser.js +344 -0
- package/dist/baselines/diffmatch/xmlParser.js.map +1 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts +51 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts.map +1 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.js +83 -0
- package/dist/baselines/wmlcomparer/DocxodusWasm.js.map +1 -0
- package/dist/baselines/wmlcomparer/DotnetCli.d.ts +40 -0
- package/dist/baselines/wmlcomparer/DotnetCli.d.ts.map +1 -0
- package/dist/baselines/wmlcomparer/DotnetCli.js +135 -0
- package/dist/baselines/wmlcomparer/DotnetCli.js.map +1 -0
- package/dist/benchmark/metrics.d.ts +72 -0
- package/dist/benchmark/metrics.d.ts.map +1 -0
- package/dist/benchmark/metrics.js +45 -0
- package/dist/benchmark/metrics.js.map +1 -0
- package/dist/benchmark/reporter.d.ts +23 -0
- package/dist/benchmark/reporter.d.ts.map +1 -0
- package/dist/benchmark/reporter.js +147 -0
- package/dist/benchmark/reporter.js.map +1 -0
- package/dist/benchmark/runner.d.ts +30 -0
- package/dist/benchmark/runner.d.ts.map +1 -0
- package/dist/benchmark/runner.js +233 -0
- package/dist/benchmark/runner.js.map +1 -0
- package/dist/cli/compare-two.d.ts +28 -0
- package/dist/cli/compare-two.d.ts.map +1 -0
- package/dist/cli/compare-two.js +110 -0
- package/dist/cli/compare-two.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +21 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core-types.d.ts +296 -0
- package/dist/core-types.d.ts.map +1 -0
- package/dist/core-types.js +122 -0
- package/dist/core-types.js.map +1 -0
- package/dist/footnotes.d.ts +144 -0
- package/dist/footnotes.d.ts.map +1 -0
- package/dist/footnotes.js +291 -0
- package/dist/footnotes.js.map +1 -0
- package/dist/format-detection.d.ts +120 -0
- package/dist/format-detection.d.ts.map +1 -0
- package/dist/format-detection.js +338 -0
- package/dist/format-detection.js.map +1 -0
- package/dist/index.d.ts +177 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +55 -0
- package/dist/index.js.map +1 -0
- package/dist/integration/output-artifacts.d.ts +6 -0
- package/dist/integration/output-artifacts.d.ts.map +1 -0
- package/dist/integration/output-artifacts.js +30 -0
- package/dist/integration/output-artifacts.js.map +1 -0
- package/dist/move-detection.d.ts +211 -0
- package/dist/move-detection.d.ts.map +1 -0
- package/dist/move-detection.js +391 -0
- package/dist/move-detection.js.map +1 -0
- package/dist/numbering.d.ts +136 -0
- package/dist/numbering.d.ts.map +1 -0
- package/dist/numbering.js +446 -0
- package/dist/numbering.js.map +1 -0
- package/dist/primitives/accept_changes.d.ts +30 -0
- package/dist/primitives/accept_changes.d.ts.map +1 -0
- package/dist/primitives/accept_changes.js +241 -0
- package/dist/primitives/accept_changes.js.map +1 -0
- package/dist/primitives/bookmarks.d.ts +12 -0
- package/dist/primitives/bookmarks.d.ts.map +1 -0
- package/dist/primitives/bookmarks.js +248 -0
- package/dist/primitives/bookmarks.js.map +1 -0
- package/dist/primitives/comments.d.ts +88 -0
- package/dist/primitives/comments.d.ts.map +1 -0
- package/dist/primitives/comments.js +703 -0
- package/dist/primitives/comments.js.map +1 -0
- package/dist/primitives/document.d.ts +168 -0
- package/dist/primitives/document.d.ts.map +1 -0
- package/dist/primitives/document.js +532 -0
- package/dist/primitives/document.js.map +1 -0
- package/dist/primitives/document_view.d.ts +93 -0
- package/dist/primitives/document_view.d.ts.map +1 -0
- package/dist/primitives/document_view.js +722 -0
- package/dist/primitives/document_view.js.map +1 -0
- package/dist/primitives/dom-helpers.d.ts +94 -0
- package/dist/primitives/dom-helpers.d.ts.map +1 -0
- package/dist/primitives/dom-helpers.js +219 -0
- package/dist/primitives/dom-helpers.js.map +1 -0
- package/dist/primitives/errors.d.ts +7 -0
- package/dist/primitives/errors.d.ts.map +1 -0
- package/dist/primitives/errors.js +10 -0
- package/dist/primitives/errors.js.map +1 -0
- package/dist/primitives/extract_revisions.d.ts +50 -0
- package/dist/primitives/extract_revisions.d.ts.map +1 -0
- package/dist/primitives/extract_revisions.js +340 -0
- package/dist/primitives/extract_revisions.js.map +1 -0
- package/dist/primitives/footnotes.d.ts +37 -0
- package/dist/primitives/footnotes.d.ts.map +1 -0
- package/dist/primitives/footnotes.js +552 -0
- package/dist/primitives/footnotes.js.map +1 -0
- package/dist/primitives/formatting_tags.d.ts +30 -0
- package/dist/primitives/formatting_tags.d.ts.map +1 -0
- package/dist/primitives/formatting_tags.js +217 -0
- package/dist/primitives/formatting_tags.js.map +1 -0
- package/dist/primitives/index.d.ts +26 -0
- package/dist/primitives/index.d.ts.map +1 -0
- package/dist/primitives/index.js +26 -0
- package/dist/primitives/index.js.map +1 -0
- package/dist/primitives/layout.d.ts +53 -0
- package/dist/primitives/layout.d.ts.map +1 -0
- package/dist/primitives/layout.js +178 -0
- package/dist/primitives/layout.js.map +1 -0
- package/dist/primitives/list_labels.d.ts +19 -0
- package/dist/primitives/list_labels.d.ts.map +1 -0
- package/dist/primitives/list_labels.js +57 -0
- package/dist/primitives/list_labels.js.map +1 -0
- package/dist/primitives/matching.d.ts +17 -0
- package/dist/primitives/matching.d.ts.map +1 -0
- package/dist/primitives/matching.js +144 -0
- package/dist/primitives/matching.js.map +1 -0
- package/dist/primitives/merge_runs.d.ts +23 -0
- package/dist/primitives/merge_runs.d.ts.map +1 -0
- package/dist/primitives/merge_runs.js +195 -0
- package/dist/primitives/merge_runs.js.map +1 -0
- package/dist/primitives/namespaces.d.ts +90 -0
- package/dist/primitives/namespaces.d.ts.map +1 -0
- package/dist/primitives/namespaces.js +107 -0
- package/dist/primitives/namespaces.js.map +1 -0
- package/dist/primitives/numbering.d.ts +27 -0
- package/dist/primitives/numbering.d.ts.map +1 -0
- package/dist/primitives/numbering.js +182 -0
- package/dist/primitives/numbering.js.map +1 -0
- package/dist/primitives/prevent_double_elevation.d.ts +18 -0
- package/dist/primitives/prevent_double_elevation.d.ts.map +1 -0
- package/dist/primitives/prevent_double_elevation.js +190 -0
- package/dist/primitives/prevent_double_elevation.js.map +1 -0
- package/dist/primitives/reject_changes.d.ts +27 -0
- package/dist/primitives/reject_changes.d.ts.map +1 -0
- package/dist/primitives/reject_changes.js +371 -0
- package/dist/primitives/reject_changes.js.map +1 -0
- package/dist/primitives/relationships.d.ts +7 -0
- package/dist/primitives/relationships.d.ts.map +1 -0
- package/dist/primitives/relationships.js +24 -0
- package/dist/primitives/relationships.js.map +1 -0
- package/dist/primitives/semantic_tags.d.ts +32 -0
- package/dist/primitives/semantic_tags.d.ts.map +1 -0
- package/dist/primitives/semantic_tags.js +139 -0
- package/dist/primitives/semantic_tags.js.map +1 -0
- package/dist/primitives/simplify_redlines.d.ts +19 -0
- package/dist/primitives/simplify_redlines.d.ts.map +1 -0
- package/dist/primitives/simplify_redlines.js +94 -0
- package/dist/primitives/simplify_redlines.js.map +1 -0
- package/dist/primitives/styles.d.ts +36 -0
- package/dist/primitives/styles.d.ts.map +1 -0
- package/dist/primitives/styles.js +190 -0
- package/dist/primitives/styles.js.map +1 -0
- package/dist/primitives/text.d.ts +27 -0
- package/dist/primitives/text.d.ts.map +1 -0
- package/dist/primitives/text.js +416 -0
- package/dist/primitives/text.js.map +1 -0
- package/dist/primitives/validate_document.d.ts +24 -0
- package/dist/primitives/validate_document.d.ts.map +1 -0
- package/dist/primitives/validate_document.js +147 -0
- package/dist/primitives/validate_document.js.map +1 -0
- package/dist/primitives/xml.d.ts +5 -0
- package/dist/primitives/xml.d.ts.map +1 -0
- package/dist/primitives/xml.js +19 -0
- package/dist/primitives/xml.js.map +1 -0
- package/dist/primitives/zip.d.ts +25 -0
- package/dist/primitives/zip.d.ts.map +1 -0
- package/dist/primitives/zip.js +78 -0
- package/dist/primitives/zip.js.map +1 -0
- package/dist/shared/docx/DocxArchive.d.ts +94 -0
- package/dist/shared/docx/DocxArchive.d.ts.map +1 -0
- package/dist/shared/docx/DocxArchive.js +169 -0
- package/dist/shared/docx/DocxArchive.js.map +1 -0
- package/dist/shared/ooxml/namespaces.d.ts +149 -0
- package/dist/shared/ooxml/namespaces.d.ts.map +1 -0
- package/dist/shared/ooxml/namespaces.js +224 -0
- package/dist/shared/ooxml/namespaces.js.map +1 -0
- package/dist/shared/ooxml/types.d.ts +136 -0
- package/dist/shared/ooxml/types.d.ts.map +1 -0
- package/dist/shared/ooxml/types.js +7 -0
- package/dist/shared/ooxml/types.js.map +1 -0
- package/package.json +63 -6
|
@@ -0,0 +1,939 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Reconstructor
|
|
3
|
+
*
|
|
4
|
+
* Rebuilds document.xml from marked atoms with track changes.
|
|
5
|
+
* Generates w:ins, w:del, w:moveFrom, w:moveTo elements as appropriate.
|
|
6
|
+
*/
|
|
7
|
+
import { CorrelationStatus } from '../../core-types.js';
|
|
8
|
+
import { getLeafText, childElements } from '../../primitives/index.js';
|
|
9
|
+
import { serializeToXml, cloneElement } from './xmlToWmlElement.js';
|
|
10
|
+
import { EMPTY_PARAGRAPH_TAG } from '../../atomizer.js';
|
|
11
|
+
import { areRunPropertiesEqual } from '../../format-detection.js';
|
|
12
|
+
import { debug } from './debug.js';
|
|
13
|
+
/**
|
|
14
|
+
* Create initial revision ID state.
|
|
15
|
+
*/
|
|
16
|
+
function createRevisionIdState() {
|
|
17
|
+
return {
|
|
18
|
+
nextId: 1,
|
|
19
|
+
moveRangeIds: new Map(),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Allocate a new revision ID.
|
|
24
|
+
*/
|
|
25
|
+
function allocateRevisionId(state) {
|
|
26
|
+
return state.nextId++;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Get or allocate move range IDs for a move name.
|
|
30
|
+
*/
|
|
31
|
+
function getMoveRangeIds(state, moveName) {
|
|
32
|
+
let ids = state.moveRangeIds.get(moveName);
|
|
33
|
+
if (!ids) {
|
|
34
|
+
ids = {
|
|
35
|
+
sourceRangeId: allocateRevisionId(state),
|
|
36
|
+
destRangeId: allocateRevisionId(state),
|
|
37
|
+
};
|
|
38
|
+
state.moveRangeIds.set(moveName, ids);
|
|
39
|
+
}
|
|
40
|
+
return ids;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Format date for OOXML (ISO 8601).
|
|
44
|
+
*/
|
|
45
|
+
function formatDate(date) {
|
|
46
|
+
return date.toISOString().replace(/\.\d{3}Z$/, 'Z');
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Reconstruct document.xml from merged atoms with track changes.
|
|
50
|
+
*
|
|
51
|
+
* @param mergedAtoms - Atoms with correlation status set
|
|
52
|
+
* @param originalXml - Original document.xml for structure preservation
|
|
53
|
+
* @param options - Reconstruction options
|
|
54
|
+
* @returns New document.xml with track changes
|
|
55
|
+
*/
|
|
56
|
+
export function reconstructDocument(mergedAtoms, originalXml, options) {
|
|
57
|
+
const { author, date } = options;
|
|
58
|
+
const dateStr = formatDate(date);
|
|
59
|
+
const revState = createRevisionIdState();
|
|
60
|
+
// Group atoms by paragraph
|
|
61
|
+
const rawParagraphGroups = groupAtomsByParagraph(mergedAtoms);
|
|
62
|
+
// Consolidate adjacent same-status changes for better readability
|
|
63
|
+
const paragraphGroups = consolidateAdjacentChanges(rawParagraphGroups);
|
|
64
|
+
// Reset debug counters
|
|
65
|
+
resetDebugCounters();
|
|
66
|
+
resetEmptyParagraphCounters();
|
|
67
|
+
debug('reconstructor', `${mergedAtoms.length} atoms -> ${paragraphGroups.length} paragraphs`);
|
|
68
|
+
// Build track changes XML for each paragraph
|
|
69
|
+
const paragraphXmls = [];
|
|
70
|
+
for (const group of paragraphGroups) {
|
|
71
|
+
const paragraphXml = buildParagraphXml(group, author, dateStr, revState);
|
|
72
|
+
paragraphXmls.push(paragraphXml);
|
|
73
|
+
}
|
|
74
|
+
const counters = getDebugCounters();
|
|
75
|
+
debug('reconstructor', `buildRunContent processed: ${counters.atoms} atoms, ${counters.wt} w:t elements`);
|
|
76
|
+
const emptyCounters = getEmptyParagraphCounters();
|
|
77
|
+
debug('reconstructor', `Empty paragraphs: inserted=${emptyCounters.inserted}, deleted=${emptyCounters.deleted}, equal=${emptyCounters.equal}, other=${emptyCounters.other}`);
|
|
78
|
+
// Reconstruct the document
|
|
79
|
+
return buildDocument(originalXml, paragraphXmls);
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Group atoms by paragraph based on their ancestor chain.
|
|
83
|
+
*
|
|
84
|
+
* First sorts atoms by paragraphIndex to ensure all atoms belonging to the same
|
|
85
|
+
* paragraph are contiguous, then groups them sequentially.
|
|
86
|
+
*/
|
|
87
|
+
function groupAtomsByParagraph(atoms) {
|
|
88
|
+
const groups = [];
|
|
89
|
+
let currentGroup = null;
|
|
90
|
+
let currentRunGroup = null;
|
|
91
|
+
const uniqueIndices = new Set(atoms.map(a => a.paragraphIndex));
|
|
92
|
+
debug('reconstructor', `groupAtomsByParagraph: ${atoms.length} atoms, ${uniqueIndices.size} unique paragraphIndices`);
|
|
93
|
+
// Sort atoms by paragraphIndex to ensure all atoms with the same index are contiguous.
|
|
94
|
+
// Use stable sort to preserve relative order within the same paragraph (deleted before inserted).
|
|
95
|
+
const sortedAtoms = [...atoms].sort((a, b) => {
|
|
96
|
+
const aIdx = a.paragraphIndex ?? Number.MAX_SAFE_INTEGER;
|
|
97
|
+
const bIdx = b.paragraphIndex ?? Number.MAX_SAFE_INTEGER;
|
|
98
|
+
return aIdx - bIdx;
|
|
99
|
+
});
|
|
100
|
+
for (const atom of sortedAtoms) {
|
|
101
|
+
// Find paragraph ancestor
|
|
102
|
+
const pAncestor = findAncestorByTag(atom, 'w:p');
|
|
103
|
+
const rAncestor = findAncestorByTag(atom, 'w:r');
|
|
104
|
+
// Check if we need a new paragraph
|
|
105
|
+
const pPr = pAncestor ? findChildByTag(pAncestor, 'w:pPr') : null;
|
|
106
|
+
// Pass currentRunGroup and current atom to check if we should start a new paragraph
|
|
107
|
+
// Uses paragraphIndex for comparison instead of object references
|
|
108
|
+
if (!currentGroup || shouldStartNewParagraph(currentGroup, currentRunGroup, atom)) {
|
|
109
|
+
if (currentRunGroup && currentGroup) {
|
|
110
|
+
currentGroup.runGroups.push(currentRunGroup);
|
|
111
|
+
}
|
|
112
|
+
currentRunGroup = null;
|
|
113
|
+
currentGroup = {
|
|
114
|
+
pPr: pPr ? cloneElement(pPr) : null,
|
|
115
|
+
runGroups: [],
|
|
116
|
+
};
|
|
117
|
+
groups.push(currentGroup);
|
|
118
|
+
}
|
|
119
|
+
// Check if we need a new run group
|
|
120
|
+
// Use the first-class rPr field from the atom when available,
|
|
121
|
+
// falling back to ancestor walk for atoms created before rPr was populated.
|
|
122
|
+
const atomRPr = getEffectiveAtomRPr(atom);
|
|
123
|
+
const rPr = atomRPr ?? (rAncestor ? findChildByTag(rAncestor, 'w:rPr') : null);
|
|
124
|
+
if (!currentRunGroup || shouldStartNewRunGroup(currentRunGroup, atom)) {
|
|
125
|
+
if (currentRunGroup) {
|
|
126
|
+
currentGroup.runGroups.push(currentRunGroup);
|
|
127
|
+
}
|
|
128
|
+
currentRunGroup = {
|
|
129
|
+
status: atom.correlationStatus,
|
|
130
|
+
atoms: [atom],
|
|
131
|
+
rPr: rPr ? cloneElement(rPr) : null,
|
|
132
|
+
moveName: atom.moveName,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
currentRunGroup.atoms.push(atom);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// Don't forget the last groups
|
|
140
|
+
if (currentRunGroup && currentGroup) {
|
|
141
|
+
currentGroup.runGroups.push(currentRunGroup);
|
|
142
|
+
}
|
|
143
|
+
return groups;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Check if a RunGroup contains only whitespace.
|
|
147
|
+
*/
|
|
148
|
+
function isWhitespaceOnlyGroup(group) {
|
|
149
|
+
return group.atoms.every(atom => {
|
|
150
|
+
const text = getLeafText(atom.contentElement) ?? '';
|
|
151
|
+
return text.trim() === '';
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Reorder atoms within change blocks.
|
|
156
|
+
*
|
|
157
|
+
* Identifies "change blocks" (contiguous regions with Del/Ins) and reorders
|
|
158
|
+
* to put all deletions first, then all insertions.
|
|
159
|
+
* Whitespace between changes is duplicated into both groups to preserve it
|
|
160
|
+
* regardless of accept/reject.
|
|
161
|
+
*/
|
|
162
|
+
function reorderChangeBlocks(groups) {
|
|
163
|
+
for (const paraGroup of groups) {
|
|
164
|
+
const runGroups = paraGroup.runGroups;
|
|
165
|
+
const result = [];
|
|
166
|
+
let i = 0;
|
|
167
|
+
while (i < runGroups.length) {
|
|
168
|
+
const current = runGroups[i];
|
|
169
|
+
// Check if we're entering a change block
|
|
170
|
+
const isChange = current.status === CorrelationStatus.Deleted ||
|
|
171
|
+
current.status === CorrelationStatus.Inserted;
|
|
172
|
+
if (!isChange) {
|
|
173
|
+
result.push(current);
|
|
174
|
+
i++;
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
// Collect the entire change block
|
|
178
|
+
const deletions = [];
|
|
179
|
+
const insertions = [];
|
|
180
|
+
while (i < runGroups.length) {
|
|
181
|
+
const group = runGroups[i];
|
|
182
|
+
if (group.status === CorrelationStatus.Deleted) {
|
|
183
|
+
deletions.push(...group.atoms);
|
|
184
|
+
i++;
|
|
185
|
+
}
|
|
186
|
+
else if (group.status === CorrelationStatus.Inserted) {
|
|
187
|
+
insertions.push(...group.atoms);
|
|
188
|
+
i++;
|
|
189
|
+
}
|
|
190
|
+
else if (group.status === CorrelationStatus.Equal && isWhitespaceOnlyGroup(group)) {
|
|
191
|
+
// Duplicate whitespace into both deletions and insertions
|
|
192
|
+
// so it's preserved regardless of accept/reject
|
|
193
|
+
for (const atom of group.atoms) {
|
|
194
|
+
// Clone for deletions (mark as deleted)
|
|
195
|
+
const delAtom = {
|
|
196
|
+
...atom,
|
|
197
|
+
correlationStatus: CorrelationStatus.Deleted,
|
|
198
|
+
};
|
|
199
|
+
deletions.push(delAtom);
|
|
200
|
+
// Clone for insertions (mark as inserted)
|
|
201
|
+
const insAtom = {
|
|
202
|
+
...atom,
|
|
203
|
+
correlationStatus: CorrelationStatus.Inserted,
|
|
204
|
+
};
|
|
205
|
+
insertions.push(insAtom);
|
|
206
|
+
}
|
|
207
|
+
i++;
|
|
208
|
+
}
|
|
209
|
+
else {
|
|
210
|
+
// Non-whitespace Equal or other status - end of block
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
// Output reordered: all deletions first, then all insertions
|
|
215
|
+
// rPr is set to null — buildRunContent will sub-group atoms by rPr
|
|
216
|
+
if (deletions.length > 0) {
|
|
217
|
+
result.push({
|
|
218
|
+
status: CorrelationStatus.Deleted,
|
|
219
|
+
atoms: deletions,
|
|
220
|
+
rPr: null,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
if (insertions.length > 0) {
|
|
224
|
+
result.push({
|
|
225
|
+
status: CorrelationStatus.Inserted,
|
|
226
|
+
atoms: insertions,
|
|
227
|
+
rPr: null,
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
paraGroup.runGroups = result;
|
|
232
|
+
}
|
|
233
|
+
return groups;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Consolidate adjacent RunGroups with the same status within each paragraph.
|
|
237
|
+
*
|
|
238
|
+
* This makes change tracking more readable by grouping consecutive deletions
|
|
239
|
+
* together and consecutive insertions together, rather than interleaving them
|
|
240
|
+
* at the word level.
|
|
241
|
+
*
|
|
242
|
+
* For example, instead of:
|
|
243
|
+
* <del>word1</del><ins>word2</ins> <del>word3</del><ins>word4</ins>
|
|
244
|
+
*
|
|
245
|
+
* We get:
|
|
246
|
+
* <del>word1 word3</del><ins>word2 word4</ins>
|
|
247
|
+
*/
|
|
248
|
+
function consolidateAdjacentChanges(groups) {
|
|
249
|
+
return reorderChangeBlocks(groups);
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Find an ancestor element by tag name.
|
|
253
|
+
*/
|
|
254
|
+
function findAncestorByTag(atom, tagName) {
|
|
255
|
+
for (let i = atom.ancestorElements.length - 1; i >= 0; i--) {
|
|
256
|
+
if (atom.ancestorElements[i].tagName === tagName) {
|
|
257
|
+
return atom.ancestorElements[i];
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return null;
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Find a child element by tag name.
|
|
264
|
+
*/
|
|
265
|
+
function findChildByTag(element, tagName) {
|
|
266
|
+
for (let i = 0; i < element.childNodes.length; i++) {
|
|
267
|
+
const child = element.childNodes[i];
|
|
268
|
+
if (child.nodeType === 1 && child.tagName === tagName) {
|
|
269
|
+
return child;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
return null;
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Determine if we should start a new paragraph.
|
|
276
|
+
*
|
|
277
|
+
* Uses paragraphIndex for comparison instead of object references, because
|
|
278
|
+
* atoms from original and revised documents have different tree objects.
|
|
279
|
+
*
|
|
280
|
+
* @param currentGroup - The current paragraph group being built
|
|
281
|
+
* @param currentRunGroup - The current run group (may not be pushed to currentGroup yet)
|
|
282
|
+
* @param currentAtom - The current atom being processed
|
|
283
|
+
*/
|
|
284
|
+
function shouldStartNewParagraph(currentGroup, currentRunGroup, currentAtom) {
|
|
285
|
+
const currentParagraphIndex = currentAtom.paragraphIndex;
|
|
286
|
+
// If no paragraph index, fall back to false (stay in current paragraph)
|
|
287
|
+
if (currentParagraphIndex === undefined)
|
|
288
|
+
return false;
|
|
289
|
+
// First check currentRunGroup (which may not be pushed to runGroups yet)
|
|
290
|
+
if (currentRunGroup && currentRunGroup.atoms.length > 0) {
|
|
291
|
+
const lastAtom = currentRunGroup.atoms[currentRunGroup.atoms.length - 1];
|
|
292
|
+
const lastParagraphIndex = lastAtom.paragraphIndex;
|
|
293
|
+
// Same paragraph index means same paragraph, even if from different trees
|
|
294
|
+
if (lastParagraphIndex !== undefined) {
|
|
295
|
+
return currentParagraphIndex !== lastParagraphIndex;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
// Fall back to checking runGroups
|
|
299
|
+
if (currentGroup.runGroups.length === 0) {
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
// Check last atom's paragraph index
|
|
303
|
+
const lastRunGroup = currentGroup.runGroups[currentGroup.runGroups.length - 1];
|
|
304
|
+
if (!lastRunGroup || lastRunGroup.atoms.length === 0) {
|
|
305
|
+
return false;
|
|
306
|
+
}
|
|
307
|
+
const lastAtom = lastRunGroup.atoms[lastRunGroup.atoms.length - 1];
|
|
308
|
+
const lastParagraphIndex = lastAtom.paragraphIndex;
|
|
309
|
+
if (lastParagraphIndex !== undefined) {
|
|
310
|
+
return currentParagraphIndex !== lastParagraphIndex;
|
|
311
|
+
}
|
|
312
|
+
// No paragraph indices available, stay in current paragraph
|
|
313
|
+
return false;
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Get the effective rPr for an atom — uses the first-class `rPr` field
|
|
317
|
+
* when available, otherwise returns null.
|
|
318
|
+
*/
|
|
319
|
+
function getEffectiveAtomRPr(atom) {
|
|
320
|
+
return atom.rPr ?? null;
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Determine if we should start a new run group.
|
|
324
|
+
*/
|
|
325
|
+
function shouldStartNewRunGroup(currentGroup, atom) {
|
|
326
|
+
// Different status = new group
|
|
327
|
+
if (currentGroup.status !== atom.correlationStatus) {
|
|
328
|
+
return true;
|
|
329
|
+
}
|
|
330
|
+
// Different move name = new group
|
|
331
|
+
if (currentGroup.moveName !== atom.moveName) {
|
|
332
|
+
return true;
|
|
333
|
+
}
|
|
334
|
+
// Skip rPr splitting for MovedSource/MovedDestination to avoid
|
|
335
|
+
// duplicate move range markers (moveFromRangeStart/End)
|
|
336
|
+
if (currentGroup.status === CorrelationStatus.MovedSource ||
|
|
337
|
+
currentGroup.status === CorrelationStatus.MovedDestination) {
|
|
338
|
+
return false;
|
|
339
|
+
}
|
|
340
|
+
// Different rPr = new group (prevents formatting bleed between runs)
|
|
341
|
+
const currentRPr = getEffectiveAtomRPr(currentGroup.atoms[currentGroup.atoms.length - 1]);
|
|
342
|
+
const newRPr = getEffectiveAtomRPr(atom);
|
|
343
|
+
// Fast path: reference equality or both null
|
|
344
|
+
if (currentRPr === newRPr)
|
|
345
|
+
return false;
|
|
346
|
+
if (currentRPr === null && newRPr === null)
|
|
347
|
+
return false;
|
|
348
|
+
return !areRunPropertiesEqual(currentRPr, newRPr);
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Check if a paragraph group represents an empty paragraph with a specific status.
|
|
352
|
+
*
|
|
353
|
+
* @param group - The paragraph group to check
|
|
354
|
+
* @param status - The correlation status to check for
|
|
355
|
+
* @returns True if all atoms are empty paragraph markers with the given status
|
|
356
|
+
*/
|
|
357
|
+
function isEmptyParagraphWithStatus(group, status) {
|
|
358
|
+
// Check if all run groups contain only empty paragraph atoms with the given status
|
|
359
|
+
for (const runGroup of group.runGroups) {
|
|
360
|
+
// If any atom is not an empty paragraph marker, this is not an empty paragraph
|
|
361
|
+
const hasNonEmptyAtom = runGroup.atoms.some((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
|
|
362
|
+
if (hasNonEmptyAtom) {
|
|
363
|
+
return false;
|
|
364
|
+
}
|
|
365
|
+
// If any atom doesn't have the expected status, return false
|
|
366
|
+
const hasWrongStatus = runGroup.atoms.some((atom) => atom.correlationStatus !== status);
|
|
367
|
+
if (hasWrongStatus) {
|
|
368
|
+
return false;
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
// All atoms are empty paragraph markers with the expected status
|
|
372
|
+
return group.runGroups.length > 0;
|
|
373
|
+
}
|
|
374
|
+
// Debug counters for empty paragraphs
|
|
375
|
+
let debugEmptyParaInserted = 0;
|
|
376
|
+
let debugEmptyParaDeleted = 0;
|
|
377
|
+
let debugEmptyParaEqual = 0;
|
|
378
|
+
let debugEmptyParaOther = 0;
|
|
379
|
+
/**
|
|
380
|
+
* Reset empty paragraph debug counters.
|
|
381
|
+
*/
|
|
382
|
+
export function resetEmptyParagraphCounters() {
|
|
383
|
+
debugEmptyParaInserted = 0;
|
|
384
|
+
debugEmptyParaDeleted = 0;
|
|
385
|
+
debugEmptyParaEqual = 0;
|
|
386
|
+
debugEmptyParaOther = 0;
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Get empty paragraph debug counters.
|
|
390
|
+
*/
|
|
391
|
+
export function getEmptyParagraphCounters() {
|
|
392
|
+
return {
|
|
393
|
+
inserted: debugEmptyParaInserted,
|
|
394
|
+
deleted: debugEmptyParaDeleted,
|
|
395
|
+
equal: debugEmptyParaEqual,
|
|
396
|
+
other: debugEmptyParaOther,
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
/**
|
|
400
|
+
* Check if a paragraph group contains only empty paragraph atoms.
|
|
401
|
+
*/
|
|
402
|
+
function isEmptyParagraphGroup(group) {
|
|
403
|
+
for (const runGroup of group.runGroups) {
|
|
404
|
+
const hasNonEmptyAtom = runGroup.atoms.some((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
|
|
405
|
+
if (hasNonEmptyAtom) {
|
|
406
|
+
return false;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
return group.runGroups.length > 0;
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Build XML for a single paragraph with track changes.
|
|
413
|
+
*/
|
|
414
|
+
function buildParagraphXml(group, author, dateStr, revState) {
|
|
415
|
+
// Track empty paragraph statuses for debugging
|
|
416
|
+
if (isEmptyParagraphGroup(group)) {
|
|
417
|
+
const status = group.runGroups[0]?.atoms[0]?.correlationStatus;
|
|
418
|
+
if (status === CorrelationStatus.Inserted) {
|
|
419
|
+
debugEmptyParaInserted++;
|
|
420
|
+
}
|
|
421
|
+
else if (status === CorrelationStatus.Deleted) {
|
|
422
|
+
debugEmptyParaDeleted++;
|
|
423
|
+
}
|
|
424
|
+
else if (status === CorrelationStatus.Equal) {
|
|
425
|
+
debugEmptyParaEqual++;
|
|
426
|
+
}
|
|
427
|
+
else {
|
|
428
|
+
debugEmptyParaOther++;
|
|
429
|
+
}
|
|
430
|
+
// Debug: log the first few empty paragraphs for investigation
|
|
431
|
+
const debugLimit = 5;
|
|
432
|
+
const totalEmpty = debugEmptyParaInserted + debugEmptyParaDeleted + debugEmptyParaEqual + debugEmptyParaOther;
|
|
433
|
+
if (totalEmpty <= debugLimit) {
|
|
434
|
+
const atoms = group.runGroups.flatMap(rg => rg.atoms);
|
|
435
|
+
const statuses = atoms.map(a => a.correlationStatus).join(', ');
|
|
436
|
+
debug('reconstructor', `Empty paragraph #${totalEmpty}: status=${status}, atomCount=${atoms.length}, atomStatuses=[${statuses}]`);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
// Whole-paragraph insert/delete encoding must match Word/Aspose behavior.
|
|
440
|
+
//
|
|
441
|
+
// IMPORTANT: <w:ins> is not a container for <w:p> in WordprocessingML.
|
|
442
|
+
// Aspose encodes a paragraph insertion like:
|
|
443
|
+
// <w:p>
|
|
444
|
+
// <w:pPr><w:rPr><w:ins .../></w:rPr></w:pPr>
|
|
445
|
+
// <w:ins ...><w:r>...</w:r></w:ins>
|
|
446
|
+
// </w:p>
|
|
447
|
+
//
|
|
448
|
+
// That structure both renders in Word and allows Reject All to remove the paragraph
|
|
449
|
+
// entirely (instead of leaving behind a stub <w:p> break).
|
|
450
|
+
if (isEntireParagraphWithStatus(group, CorrelationStatus.Inserted)) {
|
|
451
|
+
const paraId = allocateRevisionId(revState);
|
|
452
|
+
const runId = allocateRevisionId(revState);
|
|
453
|
+
const parts = [];
|
|
454
|
+
parts.push('<w:p>');
|
|
455
|
+
parts.push(serializePPrWithParaRevisionMarker(group.pPr, `<w:ins w:id="${paraId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}" />`));
|
|
456
|
+
parts.push(`<w:ins w:id="${runId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
|
|
457
|
+
for (const runGroup of group.runGroups) {
|
|
458
|
+
parts.push(buildRunContentAsPlainRun(runGroup));
|
|
459
|
+
}
|
|
460
|
+
parts.push('</w:ins>');
|
|
461
|
+
parts.push('</w:p>');
|
|
462
|
+
return parts.join('');
|
|
463
|
+
}
|
|
464
|
+
if (isEntireParagraphWithStatus(group, CorrelationStatus.Deleted)) {
|
|
465
|
+
const paraId = allocateRevisionId(revState);
|
|
466
|
+
const runId = allocateRevisionId(revState);
|
|
467
|
+
const parts = [];
|
|
468
|
+
parts.push('<w:p>');
|
|
469
|
+
parts.push(serializePPrWithParaRevisionMarker(group.pPr, `<w:del w:id="${paraId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}" />`));
|
|
470
|
+
parts.push(`<w:del w:id="${runId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
|
|
471
|
+
for (const runGroup of group.runGroups) {
|
|
472
|
+
const plainRun = buildRunContentAsPlainRun(runGroup);
|
|
473
|
+
parts.push(plainRun.replace(/<w:t([^>]*)>([^<]*)<\/w:t>/g, '<w:delText$1>$2</w:delText>'));
|
|
474
|
+
}
|
|
475
|
+
parts.push('</w:del>');
|
|
476
|
+
parts.push('</w:p>');
|
|
477
|
+
return parts.join('');
|
|
478
|
+
}
|
|
479
|
+
// Check for inserted empty paragraphs - wrap entire paragraph in w:ins
|
|
480
|
+
if (isEmptyParagraphWithStatus(group, CorrelationStatus.Inserted)) {
|
|
481
|
+
const id = allocateRevisionId(revState);
|
|
482
|
+
const parts = [];
|
|
483
|
+
parts.push(`<w:ins w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
|
|
484
|
+
parts.push('<w:p>');
|
|
485
|
+
if (group.pPr) {
|
|
486
|
+
parts.push(serializeToXml(group.pPr));
|
|
487
|
+
}
|
|
488
|
+
parts.push('</w:p>');
|
|
489
|
+
parts.push('</w:ins>');
|
|
490
|
+
return parts.join('');
|
|
491
|
+
}
|
|
492
|
+
// Check for deleted empty paragraphs - wrap entire paragraph in w:del
|
|
493
|
+
if (isEmptyParagraphWithStatus(group, CorrelationStatus.Deleted)) {
|
|
494
|
+
const id = allocateRevisionId(revState);
|
|
495
|
+
const parts = [];
|
|
496
|
+
parts.push(`<w:del w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
|
|
497
|
+
parts.push('<w:p>');
|
|
498
|
+
if (group.pPr) {
|
|
499
|
+
parts.push(serializeToXml(group.pPr));
|
|
500
|
+
}
|
|
501
|
+
parts.push('</w:p>');
|
|
502
|
+
parts.push('</w:del>');
|
|
503
|
+
return parts.join('');
|
|
504
|
+
}
|
|
505
|
+
const parts = [];
|
|
506
|
+
parts.push('<w:p>');
|
|
507
|
+
// Add paragraph properties
|
|
508
|
+
if (group.pPr) {
|
|
509
|
+
parts.push(serializeToXml(group.pPr));
|
|
510
|
+
}
|
|
511
|
+
// Add run groups with track changes
|
|
512
|
+
for (const runGroup of group.runGroups) {
|
|
513
|
+
const runXml = buildRunGroupXml(runGroup, author, dateStr, revState);
|
|
514
|
+
parts.push(runXml);
|
|
515
|
+
}
|
|
516
|
+
parts.push('</w:p>');
|
|
517
|
+
return parts.join('');
|
|
518
|
+
}
|
|
519
|
+
/**
|
|
520
|
+
* Serialize paragraph properties and ensure a paragraph-level revision marker exists.
|
|
521
|
+
*
|
|
522
|
+
* If pPr is missing, synthesize one with rPr containing the marker.
|
|
523
|
+
*/
|
|
524
|
+
function serializePPrWithParaRevisionMarker(pPr, markerXml) {
|
|
525
|
+
// Common case: no paragraph properties. Create minimal pPr/rPr.
|
|
526
|
+
if (!pPr) {
|
|
527
|
+
return `<w:pPr><w:rPr>${markerXml}</w:rPr></w:pPr>`;
|
|
528
|
+
}
|
|
529
|
+
let xml = serializeToXml(pPr);
|
|
530
|
+
// Handle self-closing <w:pPr/> form.
|
|
531
|
+
if (/<w:pPr\b[^>]*\/>/.test(xml)) {
|
|
532
|
+
return xml.replace(/<w:pPr\b([^>]*)\/>/, `<w:pPr$1><w:rPr>${markerXml}</w:rPr></w:pPr>`);
|
|
533
|
+
}
|
|
534
|
+
// If there's an rPr, inject the marker at the start of it.
|
|
535
|
+
if (xml.includes('<w:rPr')) {
|
|
536
|
+
return xml.replace(/<w:rPr(\b[^>]*)>/, `<w:rPr$1>${markerXml}`);
|
|
537
|
+
}
|
|
538
|
+
// Otherwise, add a new rPr with the marker before closing pPr.
|
|
539
|
+
return xml.replace(/<\/w:pPr>/, `<w:rPr>${markerXml}</w:rPr></w:pPr>`);
|
|
540
|
+
}
|
|
541
|
+
/**
|
|
542
|
+
* Returns true if every atom in the paragraph is of the specified status
|
|
543
|
+
* (ignoring EMPTY_PARAGRAPH_TAG markers).
|
|
544
|
+
*/
|
|
545
|
+
function isEntireParagraphWithStatus(group, status) {
|
|
546
|
+
let sawAnyContent = false;
|
|
547
|
+
let sawTargetStatus = false;
|
|
548
|
+
for (const runGroup of group.runGroups) {
|
|
549
|
+
for (const atom of runGroup.atoms) {
|
|
550
|
+
const el = atom.contentElement;
|
|
551
|
+
if (el.tagName === EMPTY_PARAGRAPH_TAG)
|
|
552
|
+
continue;
|
|
553
|
+
sawAnyContent = true;
|
|
554
|
+
// A whole-paragraph wrap should still apply even if there are "noise" atoms
|
|
555
|
+
// (pure whitespace runs, tabs, breaks) marked Equal due to normalization or
|
|
556
|
+
// LCS alignment. Those atoms would otherwise prevent wrapping and Word would
|
|
557
|
+
// leave an empty <w:p> stub on Reject All.
|
|
558
|
+
const isWhitespaceOnlyText = el.tagName === 'w:t' && ((getLeafText(el) ?? '').trim() === '');
|
|
559
|
+
const isWhitespaceAtom = isWhitespaceOnlyText || el.tagName === 'w:tab' || el.tagName === 'w:br' || el.tagName === 'w:cr';
|
|
560
|
+
if (atom.correlationStatus === status) {
|
|
561
|
+
sawTargetStatus = true;
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
if (isWhitespaceAtom) {
|
|
565
|
+
continue; // ignore for whole-paragraph classification
|
|
566
|
+
}
|
|
567
|
+
return false;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
// If there's no content at all, let the empty-paragraph handlers deal with it.
|
|
571
|
+
// Also require at least one atom with the target status so we don't wrap equal-only paragraphs.
|
|
572
|
+
return sawAnyContent && sawTargetStatus;
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Build a <w:r> without track-change wrappers. Used when the whole paragraph is already
|
|
576
|
+
* wrapped (paragraph-level <w:ins>/<w:del>).
|
|
577
|
+
*
|
|
578
|
+
* When group.rPr is null, sub-groups atoms by per-atom rPr to prevent formatting bleed.
|
|
579
|
+
*/
|
|
580
|
+
function buildRunContentAsPlainRun(group) {
|
|
581
|
+
const contentAtoms = group.atoms.filter((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
|
|
582
|
+
if (contentAtoms.length === 0)
|
|
583
|
+
return '';
|
|
584
|
+
// If group has explicit rPr, emit a single run
|
|
585
|
+
if (group.rPr !== null) {
|
|
586
|
+
return buildSingleRun(group.atoms, group.rPr);
|
|
587
|
+
}
|
|
588
|
+
// No group-level rPr — sub-group by per-atom rPr
|
|
589
|
+
const subGroups = subGroupByRPr(contentAtoms);
|
|
590
|
+
return subGroups.map(sg => buildSingleRun(sg.atoms, sg.rPr)).join('');
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Build XML for a run group with appropriate track changes wrapper.
|
|
594
|
+
*/
|
|
595
|
+
function buildRunGroupXml(group, author, dateStr, revState) {
|
|
596
|
+
const runContent = buildRunContent(group);
|
|
597
|
+
// If run content is empty (e.g., only empty paragraph atoms), return empty string
|
|
598
|
+
// This avoids generating empty track changes wrappers
|
|
599
|
+
if (!runContent) {
|
|
600
|
+
return '';
|
|
601
|
+
}
|
|
602
|
+
switch (group.status) {
|
|
603
|
+
case CorrelationStatus.Equal:
|
|
604
|
+
case CorrelationStatus.Unknown:
|
|
605
|
+
return runContent;
|
|
606
|
+
case CorrelationStatus.Inserted:
|
|
607
|
+
return wrapWithIns(runContent, author, dateStr, revState);
|
|
608
|
+
case CorrelationStatus.Deleted:
|
|
609
|
+
return wrapWithDel(runContent, author, dateStr, revState);
|
|
610
|
+
case CorrelationStatus.MovedSource:
|
|
611
|
+
return wrapWithMoveFrom(runContent, author, dateStr, group.moveName || 'move1', revState);
|
|
612
|
+
case CorrelationStatus.MovedDestination:
|
|
613
|
+
return wrapWithMoveTo(runContent, author, dateStr, group.moveName || 'move1', revState);
|
|
614
|
+
case CorrelationStatus.FormatChanged:
|
|
615
|
+
// For format changes, we include the run with rPrChange
|
|
616
|
+
return buildFormatChangeRun(group, author, dateStr, revState);
|
|
617
|
+
default:
|
|
618
|
+
return runContent;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
// Debug counter for atoms processed
|
|
622
|
+
let debugAtomCounter = 0;
|
|
623
|
+
let debugWtCounter = 0;
|
|
624
|
+
/**
|
|
625
|
+
* Reset debug counters (for testing).
|
|
626
|
+
*/
|
|
627
|
+
export function resetDebugCounters() {
|
|
628
|
+
debugAtomCounter = 0;
|
|
629
|
+
debugWtCounter = 0;
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Get debug counters (for testing).
|
|
633
|
+
*/
|
|
634
|
+
export function getDebugCounters() {
|
|
635
|
+
return { atoms: debugAtomCounter, wt: debugWtCounter };
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Sub-group atoms by contiguous rPr — atoms with the same effective rPr
|
|
639
|
+
* stay in one sub-group, a change in rPr starts a new sub-group.
|
|
640
|
+
*/
|
|
641
|
+
function subGroupByRPr(atoms) {
|
|
642
|
+
if (atoms.length === 0)
|
|
643
|
+
return [];
|
|
644
|
+
const result = [];
|
|
645
|
+
let currentRPr = getEffectiveAtomRPr(atoms[0]);
|
|
646
|
+
let currentAtoms = [atoms[0]];
|
|
647
|
+
for (let i = 1; i < atoms.length; i++) {
|
|
648
|
+
const atomRPr = getEffectiveAtomRPr(atoms[i]);
|
|
649
|
+
// Fast path: reference equality or both null
|
|
650
|
+
let same = currentRPr === atomRPr;
|
|
651
|
+
if (!same && currentRPr === null && atomRPr === null) {
|
|
652
|
+
same = true;
|
|
653
|
+
}
|
|
654
|
+
if (!same) {
|
|
655
|
+
same = areRunPropertiesEqual(currentRPr, atomRPr);
|
|
656
|
+
}
|
|
657
|
+
if (same) {
|
|
658
|
+
currentAtoms.push(atoms[i]);
|
|
659
|
+
}
|
|
660
|
+
else {
|
|
661
|
+
result.push({ rPr: currentRPr, atoms: currentAtoms });
|
|
662
|
+
currentRPr = atomRPr;
|
|
663
|
+
currentAtoms = [atoms[i]];
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
result.push({ rPr: currentRPr, atoms: currentAtoms });
|
|
667
|
+
return result;
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* Build a single <w:r> element from a set of atoms with the given rPr.
|
|
671
|
+
* Preserves pendingText coalescing, collapsedFieldAtoms expansion,
|
|
672
|
+
* and debug counter increments.
|
|
673
|
+
*/
|
|
674
|
+
function buildSingleRun(atoms, rPr) {
|
|
675
|
+
const contentAtoms = atoms.filter((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
|
|
676
|
+
if (contentAtoms.length === 0)
|
|
677
|
+
return '';
|
|
678
|
+
const parts = [];
|
|
679
|
+
parts.push('<w:r>');
|
|
680
|
+
if (rPr)
|
|
681
|
+
parts.push(serializeToXml(rPr));
|
|
682
|
+
let pendingText = '';
|
|
683
|
+
const flushPendingText = () => {
|
|
684
|
+
if (!pendingText)
|
|
685
|
+
return;
|
|
686
|
+
const escaped = escapeXmlText(pendingText);
|
|
687
|
+
const needsPreserve = pendingText.startsWith(' ') ||
|
|
688
|
+
pendingText.endsWith(' ') ||
|
|
689
|
+
pendingText.includes(' ');
|
|
690
|
+
parts.push(needsPreserve
|
|
691
|
+
? `<w:t xml:space="preserve">${escaped}</w:t>`
|
|
692
|
+
: `<w:t>${escaped}</w:t>`);
|
|
693
|
+
pendingText = '';
|
|
694
|
+
};
|
|
695
|
+
for (const atom of contentAtoms) {
|
|
696
|
+
debugAtomCounter++;
|
|
697
|
+
if (atom.collapsedFieldAtoms && atom.collapsedFieldAtoms.length > 0) {
|
|
698
|
+
flushPendingText();
|
|
699
|
+
for (const fieldAtom of atom.collapsedFieldAtoms) {
|
|
700
|
+
parts.push(serializeAtomElement(fieldAtom.contentElement));
|
|
701
|
+
}
|
|
702
|
+
continue;
|
|
703
|
+
}
|
|
704
|
+
const el = atom.contentElement;
|
|
705
|
+
if (el.tagName === 'w:t') {
|
|
706
|
+
pendingText += getLeafText(el) ?? '';
|
|
707
|
+
continue;
|
|
708
|
+
}
|
|
709
|
+
flushPendingText();
|
|
710
|
+
parts.push(serializeAtomElement(el));
|
|
711
|
+
}
|
|
712
|
+
flushPendingText();
|
|
713
|
+
parts.push('</w:r>');
|
|
714
|
+
return parts.join('');
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Serialize an atom's content element to XML string.
|
|
718
|
+
*/
|
|
719
|
+
function serializeAtomElement(element) {
|
|
720
|
+
if (element.tagName === 'w:t') {
|
|
721
|
+
debugWtCounter++;
|
|
722
|
+
// Text element - preserve xml:space if needed
|
|
723
|
+
const text = escapeXmlText(getLeafText(element) ?? '');
|
|
724
|
+
if (text.startsWith(' ') || text.endsWith(' ') || text.includes(' ')) {
|
|
725
|
+
return `<w:t xml:space="preserve">${text}</w:t>`;
|
|
726
|
+
}
|
|
727
|
+
else {
|
|
728
|
+
return `<w:t>${text}</w:t>`;
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
else if (element.tagName === 'w:br') {
|
|
732
|
+
return '<w:br/>';
|
|
733
|
+
}
|
|
734
|
+
else if (element.tagName === 'w:tab') {
|
|
735
|
+
return '<w:tab/>';
|
|
736
|
+
}
|
|
737
|
+
else if (element.tagName === 'w:cr') {
|
|
738
|
+
return '<w:cr/>';
|
|
739
|
+
}
|
|
740
|
+
else {
|
|
741
|
+
// Other elements (including field chars, instrText) - serialize as-is
|
|
742
|
+
return serializeToXml(element);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* Build the content of a run from atoms.
|
|
747
|
+
*
|
|
748
|
+
* Returns empty string if all atoms are empty paragraph markers,
|
|
749
|
+
* which ensures no empty <w:r> elements are generated.
|
|
750
|
+
*
|
|
751
|
+
* When group.rPr is non-null, emits a single <w:r> with that rPr.
|
|
752
|
+
* When group.rPr is null (e.g., after reorderChangeBlocks merges atoms
|
|
753
|
+
* from multiple original RunGroups), sub-groups atoms by their per-atom
|
|
754
|
+
* rPr and emits one <w:r> per sub-group to prevent formatting bleed.
|
|
755
|
+
*/
|
|
756
|
+
function buildRunContent(group) {
|
|
757
|
+
// Check if this run group contains only empty paragraph atoms
|
|
758
|
+
const contentAtoms = group.atoms.filter((atom) => atom.contentElement.tagName !== EMPTY_PARAGRAPH_TAG);
|
|
759
|
+
// If no content atoms, return empty string (don't generate empty run)
|
|
760
|
+
if (contentAtoms.length === 0) {
|
|
761
|
+
return '';
|
|
762
|
+
}
|
|
763
|
+
// If group has explicit rPr, emit a single run
|
|
764
|
+
if (group.rPr !== null) {
|
|
765
|
+
return buildSingleRun(group.atoms, group.rPr);
|
|
766
|
+
}
|
|
767
|
+
// No group-level rPr — sub-group by per-atom rPr
|
|
768
|
+
const subGroups = subGroupByRPr(contentAtoms);
|
|
769
|
+
return subGroups.map(sg => buildSingleRun(sg.atoms, sg.rPr)).join('');
|
|
770
|
+
}
|
|
771
|
+
/**
|
|
772
|
+
* Wrap content with w:ins element.
|
|
773
|
+
*/
|
|
774
|
+
function wrapWithIns(content, author, dateStr, revState) {
|
|
775
|
+
const id = allocateRevisionId(revState);
|
|
776
|
+
return `<w:ins w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${content}</w:ins>`;
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Wrap content with w:del element.
|
|
780
|
+
*/
|
|
781
|
+
function wrapWithDel(content, author, dateStr, revState) {
|
|
782
|
+
const id = allocateRevisionId(revState);
|
|
783
|
+
// For deletions, we need to convert w:t to w:delText
|
|
784
|
+
const delContent = content.replace(/<w:t([^>]*)>([^<]*)<\/w:t>/g, '<w:delText$1>$2</w:delText>');
|
|
785
|
+
return `<w:del w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${delContent}</w:del>`;
|
|
786
|
+
}
|
|
787
|
+
/**
|
|
788
|
+
* Wrap content with w:moveFrom elements.
|
|
789
|
+
*/
|
|
790
|
+
function wrapWithMoveFrom(content, author, dateStr, moveName, revState) {
|
|
791
|
+
const ids = getMoveRangeIds(revState, moveName);
|
|
792
|
+
const moveId = allocateRevisionId(revState);
|
|
793
|
+
// Convert w:t to w:delText for moved-from content
|
|
794
|
+
const delContent = content.replace(/<w:t([^>]*)>([^<]*)<\/w:t>/g, '<w:delText$1>$2</w:delText>');
|
|
795
|
+
return (`<w:moveFromRangeStart w:id="${ids.sourceRangeId}" w:name="${moveName}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}"/>` +
|
|
796
|
+
`<w:moveFrom w:id="${moveId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${delContent}</w:moveFrom>` +
|
|
797
|
+
`<w:moveFromRangeEnd w:id="${ids.sourceRangeId}"/>`);
|
|
798
|
+
}
|
|
799
|
+
/**
|
|
800
|
+
* Wrap content with w:moveTo elements.
|
|
801
|
+
*/
|
|
802
|
+
function wrapWithMoveTo(content, author, dateStr, moveName, revState) {
|
|
803
|
+
const ids = getMoveRangeIds(revState, moveName);
|
|
804
|
+
const moveId = allocateRevisionId(revState);
|
|
805
|
+
return (`<w:moveToRangeStart w:id="${ids.destRangeId}" w:name="${moveName}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}"/>` +
|
|
806
|
+
`<w:moveTo w:id="${moveId}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">${content}</w:moveTo>` +
|
|
807
|
+
`<w:moveToRangeEnd w:id="${ids.destRangeId}"/>`);
|
|
808
|
+
}
|
|
809
|
+
/**
|
|
810
|
+
* Build run with format change tracking (w:rPrChange).
|
|
811
|
+
*/
|
|
812
|
+
function buildFormatChangeRun(group, author, dateStr, revState) {
|
|
813
|
+
const parts = [];
|
|
814
|
+
parts.push('<w:r>');
|
|
815
|
+
// Build rPr with rPrChange
|
|
816
|
+
const effectiveRPr = group.rPr ?? group.atoms[0]?.rPr ?? null;
|
|
817
|
+
if (effectiveRPr || group.atoms[0]?.formatChange) {
|
|
818
|
+
parts.push('<w:rPr>');
|
|
819
|
+
// Current properties
|
|
820
|
+
if (effectiveRPr) {
|
|
821
|
+
for (const child of childElements(effectiveRPr)) {
|
|
822
|
+
if (child.tagName !== 'w:rPrChange') {
|
|
823
|
+
parts.push(serializeToXml(child));
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
// Add rPrChange with old properties
|
|
828
|
+
const formatChange = group.atoms[0]?.formatChange;
|
|
829
|
+
if (formatChange?.oldRunProperties) {
|
|
830
|
+
const id = allocateRevisionId(revState);
|
|
831
|
+
parts.push(`<w:rPrChange w:id="${id}" w:author="${escapeXmlAttr(author)}" w:date="${dateStr}">`);
|
|
832
|
+
for (const child of childElements(formatChange.oldRunProperties)) {
|
|
833
|
+
parts.push(serializeToXml(child));
|
|
834
|
+
}
|
|
835
|
+
parts.push('</w:rPrChange>');
|
|
836
|
+
}
|
|
837
|
+
parts.push('</w:rPr>');
|
|
838
|
+
}
|
|
839
|
+
// Add atom content
|
|
840
|
+
for (const atom of group.atoms) {
|
|
841
|
+
const element = atom.contentElement;
|
|
842
|
+
if (element.tagName === 'w:t') {
|
|
843
|
+
const text = escapeXmlText(getLeafText(element) ?? '');
|
|
844
|
+
if (text.startsWith(' ') || text.endsWith(' ') || text.includes(' ')) {
|
|
845
|
+
parts.push(`<w:t xml:space="preserve">${text}</w:t>`);
|
|
846
|
+
}
|
|
847
|
+
else {
|
|
848
|
+
parts.push(`<w:t>${text}</w:t>`);
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
else {
|
|
852
|
+
parts.push(serializeToXml(element));
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
parts.push('</w:r>');
|
|
856
|
+
return parts.join('');
|
|
857
|
+
}
|
|
858
|
+
/**
|
|
859
|
+
* Build the final document by replacing body content.
|
|
860
|
+
*
|
|
861
|
+
* Note: sectPr elements are NOT extracted and appended separately because:
|
|
862
|
+
* 1. Section properties inside pPr elements are already preserved in the reconstructed paragraphs
|
|
863
|
+
* 2. The regex to extract "final sectPr" was incorrectly matching sectPr inside pPr elements
|
|
864
|
+
* and capturing large amounts of body content, causing duplicate text.
|
|
865
|
+
*/
|
|
866
|
+
function buildDocument(originalXml, paragraphXmls) {
|
|
867
|
+
// Extract document structure
|
|
868
|
+
const bodyMatch = originalXml.match(/(<w:body[^>]*>)([\s\S]*?)(<\/w:body>)/);
|
|
869
|
+
if (!bodyMatch) {
|
|
870
|
+
throw new Error('Could not find w:body in document');
|
|
871
|
+
}
|
|
872
|
+
const beforeBody = originalXml.slice(0, originalXml.indexOf(bodyMatch[0]));
|
|
873
|
+
const bodyOpenTag = bodyMatch[1];
|
|
874
|
+
const bodyCloseTag = bodyMatch[3];
|
|
875
|
+
const afterBody = originalXml.slice(originalXml.indexOf(bodyMatch[0]) + bodyMatch[0].length);
|
|
876
|
+
// Build new body (no separate sectPr extraction - it's in the paragraphs' pPr)
|
|
877
|
+
const newBodyContent = paragraphXmls.join('\n');
|
|
878
|
+
return beforeBody + bodyOpenTag + '\n' + newBodyContent + '\n' + bodyCloseTag + afterBody;
|
|
879
|
+
}
|
|
880
|
+
/**
|
|
881
|
+
* Escape XML text content.
|
|
882
|
+
*/
|
|
883
|
+
function escapeXmlText(text) {
|
|
884
|
+
return text
|
|
885
|
+
.replace(/&/g, '&')
|
|
886
|
+
.replace(/</g, '<')
|
|
887
|
+
.replace(/>/g, '>');
|
|
888
|
+
}
|
|
889
|
+
/**
|
|
890
|
+
* Escape XML attribute value.
|
|
891
|
+
*/
|
|
892
|
+
function escapeXmlAttr(text) {
|
|
893
|
+
return text
|
|
894
|
+
.replace(/&/g, '&')
|
|
895
|
+
.replace(/</g, '<')
|
|
896
|
+
.replace(/>/g, '>')
|
|
897
|
+
.replace(/"/g, '"');
|
|
898
|
+
}
|
|
899
|
+
/**
|
|
900
|
+
* Count statistics from merged atoms.
|
|
901
|
+
*/
|
|
902
|
+
export function computeReconstructionStats(mergedAtoms) {
|
|
903
|
+
let insertions = 0;
|
|
904
|
+
let deletions = 0;
|
|
905
|
+
let moves = 0;
|
|
906
|
+
let formatChanges = 0;
|
|
907
|
+
const paragraphs = new Set();
|
|
908
|
+
for (const atom of mergedAtoms) {
|
|
909
|
+
// Count paragraph
|
|
910
|
+
const pAncestor = findAncestorByTag(atom, 'w:p');
|
|
911
|
+
if (pAncestor) {
|
|
912
|
+
paragraphs.add(pAncestor);
|
|
913
|
+
}
|
|
914
|
+
// Count by status
|
|
915
|
+
switch (atom.correlationStatus) {
|
|
916
|
+
case CorrelationStatus.Inserted:
|
|
917
|
+
insertions++;
|
|
918
|
+
break;
|
|
919
|
+
case CorrelationStatus.Deleted:
|
|
920
|
+
deletions++;
|
|
921
|
+
break;
|
|
922
|
+
case CorrelationStatus.MovedSource:
|
|
923
|
+
case CorrelationStatus.MovedDestination:
|
|
924
|
+
moves++;
|
|
925
|
+
break;
|
|
926
|
+
case CorrelationStatus.FormatChanged:
|
|
927
|
+
formatChanges++;
|
|
928
|
+
break;
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
return {
|
|
932
|
+
paragraphs: paragraphs.size,
|
|
933
|
+
insertions,
|
|
934
|
+
deletions,
|
|
935
|
+
moves: Math.floor(moves / 2), // Source and destination counted separately
|
|
936
|
+
formatChanges,
|
|
937
|
+
};
|
|
938
|
+
}
|
|
939
|
+
//# sourceMappingURL=documentReconstructor.js.map
|