@usejunior/docx-core 0.8.2 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/atomizer.d.ts +35 -0
- package/dist/atomizer.d.ts.map +1 -1
- package/dist/atomizer.js +66 -4
- package/dist/atomizer.js.map +1 -1
- package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
- package/dist/baselines/atomizer/documentReconstructor.js +77 -7
- package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
- package/dist/baselines/atomizer/hierarchicalLcs.d.ts.map +1 -1
- package/dist/baselines/atomizer/hierarchicalLcs.js +55 -69
- package/dist/baselines/atomizer/hierarchicalLcs.js.map +1 -1
- package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
- package/dist/baselines/atomizer/inPlaceModifier.js +3 -2
- package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
- package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
- package/dist/baselines/atomizer/pipeline.js +249 -88
- package/dist/baselines/atomizer/pipeline.js.map +1 -1
- package/dist/baselines/atomizer/xmlToWmlElement.d.ts.map +1 -1
- package/dist/baselines/atomizer/xmlToWmlElement.js +3 -2
- package/dist/baselines/atomizer/xmlToWmlElement.js.map +1 -1
- package/dist/format-detection.d.ts.map +1 -1
- package/dist/format-detection.js +2 -1
- package/dist/format-detection.js.map +1 -1
- package/dist/integration/synthetic-docx-fixture.d.ts +62 -0
- package/dist/integration/synthetic-docx-fixture.d.ts.map +1 -0
- package/dist/integration/synthetic-docx-fixture.js +171 -0
- package/dist/integration/synthetic-docx-fixture.js.map +1 -0
- package/dist/move-detection.d.ts.map +1 -1
- package/dist/move-detection.js +3 -4
- package/dist/move-detection.js.map +1 -1
- package/dist/primitives/dom-helpers.d.ts +1 -5
- package/dist/primitives/dom-helpers.d.ts.map +1 -1
- package/dist/primitives/dom-helpers.js +12 -25
- package/dist/primitives/dom-helpers.js.map +1 -1
- package/dist/primitives/layout.d.ts.map +1 -1
- package/dist/primitives/layout.js +4 -6
- package/dist/primitives/layout.js.map +1 -1
- package/dist/primitives/xml.d.ts.map +1 -1
- package/dist/primitives/xml.js +2 -0
- package/dist/primitives/xml.js.map +1 -1
- package/package.json +3 -3
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
* Integrates atomization, LCS comparison, move detection, format detection,
|
|
6
6
|
* and document reconstruction.
|
|
7
7
|
*/
|
|
8
|
-
import {
|
|
8
|
+
import { XMLSerializer } from '@xmldom/xmldom';
|
|
9
|
+
import { parseXml } from '../../primitives/xml.js';
|
|
9
10
|
import { DocxArchive } from '../../shared/docx/DocxArchive.js';
|
|
10
11
|
import { DEFAULT_MOVE_DETECTION_SETTINGS, DEFAULT_FORMAT_DETECTION_SETTINGS, CorrelationStatus, } from '../../core-types.js';
|
|
11
12
|
import { atomizeTree, assignParagraphIndices } from '../../atomizer.js';
|
|
@@ -529,7 +530,7 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
|
|
|
529
530
|
comparisonResult = selected;
|
|
530
531
|
}
|
|
531
532
|
else {
|
|
532
|
-
comparisonResult = runComparisonPass(
|
|
533
|
+
comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
|
|
533
534
|
fallbackReason = 'round_trip_safety_check_failed';
|
|
534
535
|
fallbackDiagnostics = {
|
|
535
536
|
attempts: failedAttempts,
|
|
@@ -537,30 +538,33 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
|
|
|
537
538
|
}
|
|
538
539
|
}
|
|
539
540
|
else {
|
|
540
|
-
comparisonResult = runComparisonPass(
|
|
541
|
+
comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
|
|
541
542
|
}
|
|
542
543
|
const { mergedAtoms, newDocumentXml } = comparisonResult;
|
|
543
544
|
// Step 12: Clone appropriate archive and update document.xml.
|
|
544
545
|
// Use the revised archive only for true inplace output.
|
|
545
546
|
const baseArchive = comparisonResult.outputMode === 'inplace' ? revisedArchive : originalArchive;
|
|
547
|
+
// The merge source is the *opposite* archive from the base: inplace pulls
|
|
548
|
+
// deleted-but-still-referenced definitions from the original, rebuild pulls
|
|
549
|
+
// added-but-still-referenced definitions from the revised. Without this,
|
|
550
|
+
// rebuild output ships dangling references when the original lacks an
|
|
551
|
+
// auxiliary part that the revised side introduced (issue #94).
|
|
552
|
+
const mergeSourceArchive = comparisonResult.outputMode === 'inplace' ? originalArchive : revisedArchive;
|
|
546
553
|
const resultArchive = await baseArchive.clone();
|
|
547
554
|
resultArchive.setDocumentXml(newDocumentXml);
|
|
548
|
-
// Step 12b:
|
|
549
|
-
//
|
|
550
|
-
//
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
if (mergeResults.has('comment')) {
|
|
562
|
-
await mergeCommentAncillaryParts(originalArchive, resultArchive, mergeResults.get('comment'));
|
|
563
|
-
}
|
|
555
|
+
// Step 12b: Merge auxiliary part definitions (footnotes, endnotes, comments).
|
|
556
|
+
// Reconstruction may insert content (deleted in inplace, added in rebuild)
|
|
557
|
+
// whose definitions are missing from the base archive.
|
|
558
|
+
for (const descriptor of AUXILIARY_PARTS) {
|
|
559
|
+
await mergeAuxiliaryPartDefinitions(mergeSourceArchive, resultArchive, newDocumentXml, descriptor);
|
|
560
|
+
}
|
|
561
|
+
// Comment-specific post-pass: walk reply threads via commentsExtended.xml.
|
|
562
|
+
// Gated on root comment IDs in the *result* document (not on what the
|
|
563
|
+
// generic merge appended), so the pass runs even when the original already
|
|
564
|
+
// contains the root and revised only adds replies under it (issue #108).
|
|
565
|
+
const rootCommentIds = collectReferenceIds(newDocumentXml, 'w:commentReference');
|
|
566
|
+
if (rootCommentIds.size > 0) {
|
|
567
|
+
await mergeCommentAncillaryParts(mergeSourceArchive, resultArchive, rootCommentIds);
|
|
564
568
|
}
|
|
565
569
|
// Step 13: Save result and compute stats
|
|
566
570
|
const resultBuffer = await resultArchive.save();
|
|
@@ -609,7 +613,7 @@ const AUXILIARY_PARTS = [
|
|
|
609
613
|
*/
|
|
610
614
|
function collectReferenceIds(documentXml, referenceTag) {
|
|
611
615
|
const ids = new Set();
|
|
612
|
-
const doc =
|
|
616
|
+
const doc = parseXml(documentXml);
|
|
613
617
|
const refs = doc.getElementsByTagName(referenceTag);
|
|
614
618
|
for (let i = 0; i < refs.length; i++) {
|
|
615
619
|
const id = refs[i].getAttribute('w:id');
|
|
@@ -622,7 +626,7 @@ function collectReferenceIds(documentXml, referenceTag) {
|
|
|
622
626
|
* Parse an auxiliary part and extract entry elements by ID.
|
|
623
627
|
*/
|
|
624
628
|
function parseEntries(xml, entryTag) {
|
|
625
|
-
const doc =
|
|
629
|
+
const doc = parseXml(xml);
|
|
626
630
|
const entries = new Map();
|
|
627
631
|
const elements = doc.getElementsByTagName(entryTag);
|
|
628
632
|
for (let i = 0; i < elements.length; i++) {
|
|
@@ -636,25 +640,27 @@ function parseEntries(xml, entryTag) {
|
|
|
636
640
|
const serializer = new XMLSerializer();
|
|
637
641
|
/**
|
|
638
642
|
* Merge auxiliary part definitions (footnotes, endnotes, comments) from the
|
|
639
|
-
*
|
|
640
|
-
*
|
|
643
|
+
* source archive into the result archive. The source archive is whichever
|
|
644
|
+
* side reconstruction may have introduced references to: original in inplace
|
|
645
|
+
* mode (deleted-but-referenced definitions), revised in rebuild mode
|
|
646
|
+
* (added-but-referenced definitions).
|
|
641
647
|
*/
|
|
642
|
-
async function mergeAuxiliaryPartDefinitions(
|
|
648
|
+
async function mergeAuxiliaryPartDefinitions(sourceArchive, resultArchive, documentXml, descriptor) {
|
|
643
649
|
const result = { mergedIds: new Set(), createdPart: false };
|
|
644
650
|
const referencedIds = collectReferenceIds(documentXml, descriptor.referenceTag);
|
|
645
651
|
if (referencedIds.size === 0)
|
|
646
652
|
return result;
|
|
647
|
-
const
|
|
648
|
-
if (!
|
|
653
|
+
const sourcePartXml = await sourceArchive.getFile(descriptor.partPath);
|
|
654
|
+
if (!sourcePartXml)
|
|
649
655
|
return result;
|
|
650
656
|
const resultPartXml = await resultArchive.getFile(descriptor.partPath);
|
|
651
|
-
const
|
|
657
|
+
const sourceParsed = parseEntries(sourcePartXml, descriptor.entryTag);
|
|
652
658
|
const resultParsed = resultPartXml ? parseEntries(resultPartXml, descriptor.entryTag) : null;
|
|
653
659
|
// Find missing entries: referenced in document.xml but not in result
|
|
654
660
|
const missingElements = [];
|
|
655
661
|
for (const id of referencedIds) {
|
|
656
|
-
if (!(resultParsed?.entries.has(id)) &&
|
|
657
|
-
missingElements.push(
|
|
662
|
+
if (!(resultParsed?.entries.has(id)) && sourceParsed.entries.has(id)) {
|
|
663
|
+
missingElements.push(sourceParsed.entries.get(id));
|
|
658
664
|
result.mergedIds.add(id);
|
|
659
665
|
}
|
|
660
666
|
}
|
|
@@ -672,27 +678,33 @@ async function mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, doc
|
|
|
672
678
|
}
|
|
673
679
|
}
|
|
674
680
|
else {
|
|
675
|
-
// Create part from scratch: clone root from
|
|
676
|
-
|
|
681
|
+
// Create part from scratch: clone root from merge source, drop every
|
|
682
|
+
// non-reserved entry, then append the missing referenced ones.
|
|
683
|
+
// Reserved entries are footnote/endnote separators identified by
|
|
684
|
+
// w:type="separator" / w:type="continuationSeparator" — Word expects
|
|
685
|
+
// them to exist and they don't carry user content. Filtering by w:type
|
|
686
|
+
// (not by magic w:id values) keeps this robust across authoring tools.
|
|
687
|
+
const newDoc = parseXml(sourcePartXml);
|
|
677
688
|
const rootEl = newDoc.getElementsByTagName(descriptor.rootTag)[0];
|
|
678
689
|
if (rootEl) {
|
|
679
|
-
// Remove all existing entries — we only want the missing ones
|
|
680
690
|
const existingEntries = rootEl.getElementsByTagName(descriptor.entryTag);
|
|
681
691
|
const toRemove = [];
|
|
682
692
|
for (let i = 0; i < existingEntries.length; i++) {
|
|
683
|
-
|
|
693
|
+
const el = existingEntries[i];
|
|
694
|
+
const type = el.getAttribute('w:type');
|
|
695
|
+
if (type !== 'separator' && type !== 'continuationSeparator') {
|
|
696
|
+
toRemove.push(el);
|
|
697
|
+
}
|
|
684
698
|
}
|
|
685
699
|
for (const el of toRemove) {
|
|
686
700
|
rootEl.removeChild(el);
|
|
687
701
|
}
|
|
688
|
-
// Add back only the missing entries
|
|
689
702
|
for (const el of missingElements) {
|
|
690
703
|
const imported = newDoc.importNode(el, true);
|
|
691
704
|
rootEl.appendChild(imported);
|
|
692
705
|
}
|
|
693
706
|
resultArchive.setFile(descriptor.partPath, serializer.serializeToString(newDoc));
|
|
694
707
|
result.createdPart = true;
|
|
695
|
-
// Bootstrap OPC metadata for the newly created part
|
|
696
708
|
await ensureOpcMetadata(resultArchive, descriptor);
|
|
697
709
|
}
|
|
698
710
|
}
|
|
@@ -711,7 +723,7 @@ async function ensureOpcMetadata(archive, descriptor) {
|
|
|
711
723
|
// 1. Update [Content_Types].xml
|
|
712
724
|
const ctXml = await archive.getFile('[Content_Types].xml');
|
|
713
725
|
if (ctXml) {
|
|
714
|
-
const ctDoc =
|
|
726
|
+
const ctDoc = parseXml(ctXml);
|
|
715
727
|
const typesEl = ctDoc.documentElement;
|
|
716
728
|
const overrides = typesEl.getElementsByTagNameNS(CT_NS, 'Override');
|
|
717
729
|
const partName = `/${descriptor.partPath}`;
|
|
@@ -734,7 +746,7 @@ async function ensureOpcMetadata(archive, descriptor) {
|
|
|
734
746
|
const relsPath = 'word/_rels/document.xml.rels';
|
|
735
747
|
const relsXml = await archive.getFile(relsPath);
|
|
736
748
|
if (relsXml) {
|
|
737
|
-
const relsDoc =
|
|
749
|
+
const relsDoc = parseXml(relsXml);
|
|
738
750
|
const relsEl = relsDoc.documentElement;
|
|
739
751
|
const existingRels = relsEl.getElementsByTagNameNS(REL_NS, 'Relationship');
|
|
740
752
|
let found = false;
|
|
@@ -764,52 +776,154 @@ async function ensureOpcMetadata(archive, descriptor) {
|
|
|
764
776
|
// Comment Ancillary Parts Merging
|
|
765
777
|
// =============================================================================
|
|
766
778
|
/**
|
|
767
|
-
*
|
|
768
|
-
*
|
|
779
|
+
* Walk the comment reply graph from each root referenced in the result
|
|
780
|
+
* document, merging reply <w:comment> entries, their commentsExtended.xml
|
|
781
|
+
* threading entries, and people.xml authors. Replies have no
|
|
782
|
+
* <w:commentReference> in document.xml — they're discoverable only via
|
|
783
|
+
* w15:paraIdParent in commentsExtended.xml. Without this expansion, rebuild
|
|
784
|
+
* mode silently drops reply threads (issue #108).
|
|
769
785
|
*/
|
|
770
|
-
async function mergeCommentAncillaryParts(
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
if (!originalCommentsXml)
|
|
786
|
+
async function mergeCommentAncillaryParts(sourceArchive, resultArchive, rootCommentIds) {
|
|
787
|
+
const sourceCommentsXml = await sourceArchive.getFile('word/comments.xml');
|
|
788
|
+
if (!sourceCommentsXml)
|
|
774
789
|
return;
|
|
775
|
-
const
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
const
|
|
779
|
-
|
|
780
|
-
|
|
790
|
+
const sourceDoc = parseXml(sourceCommentsXml);
|
|
791
|
+
// Build full source comment maps. Canonical paraId is the first <w:p>
|
|
792
|
+
// child's w14:paraId, matching getCommentElParaId() in primitives/comments.ts.
|
|
793
|
+
const commentById = new Map();
|
|
794
|
+
const paraIdByCommentId = new Map();
|
|
795
|
+
const commentIdByParaId = new Map();
|
|
796
|
+
const authorByCommentId = new Map();
|
|
797
|
+
const allCommentEls = sourceDoc.getElementsByTagName('w:comment');
|
|
798
|
+
for (let i = 0; i < allCommentEls.length; i++) {
|
|
799
|
+
const el = allCommentEls[i];
|
|
781
800
|
const id = el.getAttribute('w:id');
|
|
782
|
-
if (!id
|
|
801
|
+
if (!id)
|
|
783
802
|
continue;
|
|
803
|
+
commentById.set(id, el);
|
|
784
804
|
const author = el.getAttribute('w:author');
|
|
785
805
|
if (author)
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
const
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
806
|
+
authorByCommentId.set(id, author);
|
|
807
|
+
const firstP = el.getElementsByTagName('w:p')[0];
|
|
808
|
+
const paraId = firstP?.getAttribute('w14:paraId');
|
|
809
|
+
if (paraId) {
|
|
810
|
+
paraIdByCommentId.set(id, paraId);
|
|
811
|
+
commentIdByParaId.set(paraId, id);
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
// Seed inclusion sets from the root IDs that appear in the result document.
|
|
815
|
+
const includedCommentIds = new Set();
|
|
816
|
+
const includedParaIds = new Set();
|
|
817
|
+
const includedAuthors = new Set();
|
|
818
|
+
for (const id of rootCommentIds) {
|
|
819
|
+
if (!commentById.has(id))
|
|
820
|
+
continue;
|
|
821
|
+
includedCommentIds.add(id);
|
|
822
|
+
const pid = paraIdByCommentId.get(id);
|
|
823
|
+
if (pid)
|
|
824
|
+
includedParaIds.add(pid);
|
|
825
|
+
const author = authorByCommentId.get(id);
|
|
826
|
+
if (author)
|
|
827
|
+
includedAuthors.add(author);
|
|
828
|
+
}
|
|
829
|
+
// BFS over commentsExtended.xml's paraIdParent graph from each included
|
|
830
|
+
// root paraId. Skip entries that don't resolve to a real source comment so
|
|
831
|
+
// we never pull in dangling commentEx/people without a backing definition.
|
|
832
|
+
const sourceExtendedXml = await sourceArchive.getFile('word/commentsExtended.xml');
|
|
833
|
+
if (sourceExtendedXml) {
|
|
834
|
+
const exDoc = parseXml(sourceExtendedXml);
|
|
835
|
+
const exEls = exDoc.getElementsByTagName('w15:commentEx');
|
|
836
|
+
const childrenOf = new Map();
|
|
837
|
+
for (let i = 0; i < exEls.length; i++) {
|
|
838
|
+
const ex = exEls[i];
|
|
839
|
+
const childPid = ex.getAttribute('w15:paraId');
|
|
840
|
+
const parentPid = ex.getAttribute('w15:paraIdParent');
|
|
841
|
+
if (!childPid || !parentPid)
|
|
842
|
+
continue;
|
|
843
|
+
const arr = childrenOf.get(parentPid);
|
|
844
|
+
if (arr)
|
|
845
|
+
arr.push(childPid);
|
|
846
|
+
else
|
|
847
|
+
childrenOf.set(parentPid, [childPid]);
|
|
848
|
+
}
|
|
849
|
+
const queue = [...includedParaIds];
|
|
850
|
+
while (queue.length > 0) {
|
|
851
|
+
const pid = queue.shift();
|
|
852
|
+
const children = childrenOf.get(pid);
|
|
853
|
+
if (!children)
|
|
854
|
+
continue;
|
|
855
|
+
for (const childPid of children) {
|
|
856
|
+
if (includedParaIds.has(childPid))
|
|
857
|
+
continue;
|
|
858
|
+
const childCommentId = commentIdByParaId.get(childPid);
|
|
859
|
+
if (!childCommentId)
|
|
860
|
+
continue;
|
|
861
|
+
includedParaIds.add(childPid);
|
|
862
|
+
includedCommentIds.add(childCommentId);
|
|
863
|
+
const author = authorByCommentId.get(childCommentId);
|
|
864
|
+
if (author)
|
|
865
|
+
includedAuthors.add(author);
|
|
866
|
+
queue.push(childPid);
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
// Append any reply <w:comment> definitions still missing from result.
|
|
871
|
+
// The generic merge already added roots when needed; we add the replies
|
|
872
|
+
// (and any roots not yet present in the result, defensively).
|
|
873
|
+
await mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds);
|
|
874
|
+
// Merge commentsExtended and people for the expanded set.
|
|
875
|
+
await mergeCommentsExtended(sourceArchive, resultArchive, includedParaIds);
|
|
876
|
+
await mergePeople(sourceArchive, resultArchive, includedAuthors);
|
|
800
877
|
}
|
|
801
|
-
|
|
878
|
+
/**
|
|
879
|
+
* Append any source <w:comment> definitions in `includedCommentIds` that
|
|
880
|
+
* aren't already in result/word/comments.xml. Mirrors the append-with-importNode
|
|
881
|
+
* pattern used by mergeCommentsExtended below.
|
|
882
|
+
*/
|
|
883
|
+
async function mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds) {
|
|
884
|
+
if (includedCommentIds.size === 0)
|
|
885
|
+
return;
|
|
886
|
+
const resultXml = await resultArchive.getFile('word/comments.xml');
|
|
887
|
+
if (!resultXml) {
|
|
888
|
+
// If result has no comments.xml at all, the generic merge would have
|
|
889
|
+
// bootstrapped it for any included root. Nothing to do here.
|
|
890
|
+
return;
|
|
891
|
+
}
|
|
892
|
+
const resultDoc = parseXml(resultXml);
|
|
893
|
+
const rootEl = resultDoc.documentElement;
|
|
894
|
+
const existingIds = new Set();
|
|
895
|
+
const existing = rootEl.getElementsByTagName('w:comment');
|
|
896
|
+
for (let i = 0; i < existing.length; i++) {
|
|
897
|
+
const id = existing[i].getAttribute('w:id');
|
|
898
|
+
if (id)
|
|
899
|
+
existingIds.add(id);
|
|
900
|
+
}
|
|
901
|
+
let appended = false;
|
|
902
|
+
for (const id of includedCommentIds) {
|
|
903
|
+
if (existingIds.has(id))
|
|
904
|
+
continue;
|
|
905
|
+
const sourceEl = commentById.get(id);
|
|
906
|
+
if (!sourceEl)
|
|
907
|
+
continue;
|
|
908
|
+
rootEl.appendChild(resultDoc.importNode(sourceEl, true));
|
|
909
|
+
appended = true;
|
|
910
|
+
}
|
|
911
|
+
if (appended) {
|
|
912
|
+
resultArchive.setFile('word/comments.xml', serializer.serializeToString(resultDoc));
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
async function mergeCommentsExtended(sourceArchive, resultArchive, mergedParaIds) {
|
|
802
916
|
if (mergedParaIds.size === 0)
|
|
803
917
|
return;
|
|
804
|
-
const
|
|
805
|
-
if (!
|
|
918
|
+
const sourceXml = await sourceArchive.getFile('word/commentsExtended.xml');
|
|
919
|
+
if (!sourceXml)
|
|
806
920
|
return;
|
|
807
|
-
const
|
|
808
|
-
const
|
|
921
|
+
const sourceDoc = parseXml(sourceXml);
|
|
922
|
+
const sourceEntries = sourceDoc.getElementsByTagName('w15:commentEx');
|
|
809
923
|
// Collect entries whose paraId matches a merged comment's paragraph
|
|
810
924
|
const entriesToMerge = [];
|
|
811
|
-
for (let i = 0; i <
|
|
812
|
-
const el =
|
|
925
|
+
for (let i = 0; i < sourceEntries.length; i++) {
|
|
926
|
+
const el = sourceEntries[i];
|
|
813
927
|
const paraId = el.getAttribute('w15:paraId');
|
|
814
928
|
if (paraId && mergedParaIds.has(paraId)) {
|
|
815
929
|
entriesToMerge.push(el);
|
|
@@ -817,11 +931,10 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
|
|
|
817
931
|
}
|
|
818
932
|
if (entriesToMerge.length === 0)
|
|
819
933
|
return;
|
|
820
|
-
|
|
934
|
+
const resultXml = await resultArchive.getFile('word/commentsExtended.xml');
|
|
821
935
|
if (resultXml) {
|
|
822
|
-
const resultDoc =
|
|
936
|
+
const resultDoc = parseXml(resultXml);
|
|
823
937
|
const rootEl = resultDoc.documentElement;
|
|
824
|
-
// Check existing paraIds to avoid duplicates
|
|
825
938
|
const existingParaIds = new Set();
|
|
826
939
|
const existing = rootEl.getElementsByTagName('w15:commentEx');
|
|
827
940
|
for (let i = 0; i < existing.length; i++) {
|
|
@@ -836,21 +949,55 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
|
|
|
836
949
|
}
|
|
837
950
|
}
|
|
838
951
|
resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(resultDoc));
|
|
952
|
+
return;
|
|
839
953
|
}
|
|
840
|
-
//
|
|
841
|
-
//
|
|
954
|
+
// Bootstrap: result lacks commentsExtended.xml but the merged comments
|
|
955
|
+
// depend on it for reply threading / done state. Clone the source's root
|
|
956
|
+
// (preserves namespaces), drop non-matching entries, then add OPC metadata.
|
|
957
|
+
const newDoc = parseXml(sourceXml);
|
|
958
|
+
const newRoot = newDoc.documentElement;
|
|
959
|
+
const allEntries = newRoot.getElementsByTagName('w15:commentEx');
|
|
960
|
+
const toRemove = [];
|
|
961
|
+
for (let i = 0; i < allEntries.length; i++) {
|
|
962
|
+
const el = allEntries[i];
|
|
963
|
+
const paraId = el.getAttribute('w15:paraId');
|
|
964
|
+
if (!paraId || !mergedParaIds.has(paraId))
|
|
965
|
+
toRemove.push(el);
|
|
966
|
+
}
|
|
967
|
+
for (const el of toRemove)
|
|
968
|
+
newRoot.removeChild(el);
|
|
969
|
+
resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(newDoc));
|
|
970
|
+
await ensureOpcMetadata(resultArchive, COMMENTS_EXTENDED_DESCRIPTOR);
|
|
842
971
|
}
|
|
843
|
-
|
|
972
|
+
const COMMENTS_EXTENDED_DESCRIPTOR = {
|
|
973
|
+
label: 'commentsExtended',
|
|
974
|
+
partPath: 'word/commentsExtended.xml',
|
|
975
|
+
referenceTag: '',
|
|
976
|
+
entryTag: 'w15:commentEx',
|
|
977
|
+
rootTag: 'w15:commentsEx',
|
|
978
|
+
contentType: 'application/vnd.ms-word.commentsExtended+xml',
|
|
979
|
+
relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/commentsExtended',
|
|
980
|
+
};
|
|
981
|
+
const PEOPLE_DESCRIPTOR = {
|
|
982
|
+
label: 'people',
|
|
983
|
+
partPath: 'word/people.xml',
|
|
984
|
+
referenceTag: '',
|
|
985
|
+
entryTag: 'w15:person',
|
|
986
|
+
rootTag: 'w15:people',
|
|
987
|
+
contentType: 'application/vnd.ms-word.people+xml',
|
|
988
|
+
relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/people',
|
|
989
|
+
};
|
|
990
|
+
async function mergePeople(sourceArchive, resultArchive, mergedAuthors) {
|
|
844
991
|
if (mergedAuthors.size === 0)
|
|
845
992
|
return;
|
|
846
|
-
const
|
|
847
|
-
if (!
|
|
993
|
+
const sourceXml = await sourceArchive.getFile('word/people.xml');
|
|
994
|
+
if (!sourceXml)
|
|
848
995
|
return;
|
|
849
|
-
const
|
|
850
|
-
const
|
|
996
|
+
const sourceDoc = parseXml(sourceXml);
|
|
997
|
+
const sourcePersons = sourceDoc.getElementsByTagName('w15:person');
|
|
851
998
|
const personsToMerge = [];
|
|
852
|
-
for (let i = 0; i <
|
|
853
|
-
const el =
|
|
999
|
+
for (let i = 0; i < sourcePersons.length; i++) {
|
|
1000
|
+
const el = sourcePersons[i];
|
|
854
1001
|
const author = el.getAttribute('w15:author');
|
|
855
1002
|
if (author && mergedAuthors.has(author)) {
|
|
856
1003
|
personsToMerge.push(el);
|
|
@@ -858,11 +1005,10 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
|
|
|
858
1005
|
}
|
|
859
1006
|
if (personsToMerge.length === 0)
|
|
860
1007
|
return;
|
|
861
|
-
|
|
1008
|
+
const resultXml = await resultArchive.getFile('word/people.xml');
|
|
862
1009
|
if (resultXml) {
|
|
863
|
-
const resultDoc =
|
|
1010
|
+
const resultDoc = parseXml(resultXml);
|
|
864
1011
|
const rootEl = resultDoc.documentElement;
|
|
865
|
-
// Check existing authors to avoid duplicates
|
|
866
1012
|
const existingAuthors = new Set();
|
|
867
1013
|
const existing = rootEl.getElementsByTagName('w15:person');
|
|
868
1014
|
for (let i = 0; i < existing.length; i++) {
|
|
@@ -877,9 +1023,24 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
|
|
|
877
1023
|
}
|
|
878
1024
|
}
|
|
879
1025
|
resultArchive.setFile('word/people.xml', serializer.serializeToString(resultDoc));
|
|
1026
|
+
return;
|
|
1027
|
+
}
|
|
1028
|
+
// Bootstrap: result lacks people.xml. Clone source root (preserves
|
|
1029
|
+
// namespaces), remove non-matching authors, then add OPC metadata.
|
|
1030
|
+
const newDoc = parseXml(sourceXml);
|
|
1031
|
+
const newRoot = newDoc.documentElement;
|
|
1032
|
+
const allPersons = newRoot.getElementsByTagName('w15:person');
|
|
1033
|
+
const toRemove = [];
|
|
1034
|
+
for (let i = 0; i < allPersons.length; i++) {
|
|
1035
|
+
const el = allPersons[i];
|
|
1036
|
+
const author = el.getAttribute('w15:author');
|
|
1037
|
+
if (!author || !mergedAuthors.has(author))
|
|
1038
|
+
toRemove.push(el);
|
|
880
1039
|
}
|
|
881
|
-
|
|
882
|
-
|
|
1040
|
+
for (const el of toRemove)
|
|
1041
|
+
newRoot.removeChild(el);
|
|
1042
|
+
resultArchive.setFile('word/people.xml', serializer.serializeToString(newDoc));
|
|
1043
|
+
await ensureOpcMetadata(resultArchive, PEOPLE_DESCRIPTOR);
|
|
883
1044
|
}
|
|
884
1045
|
/**
|
|
885
1046
|
* Compute comparison statistics from merged atoms.
|