@usejunior/docx-core 0.8.2 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/.tsbuildinfo +1 -1
  2. package/dist/atomizer.d.ts +35 -0
  3. package/dist/atomizer.d.ts.map +1 -1
  4. package/dist/atomizer.js +66 -4
  5. package/dist/atomizer.js.map +1 -1
  6. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
  7. package/dist/baselines/atomizer/documentReconstructor.js +77 -7
  8. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
  9. package/dist/baselines/atomizer/hierarchicalLcs.d.ts.map +1 -1
  10. package/dist/baselines/atomizer/hierarchicalLcs.js +55 -69
  11. package/dist/baselines/atomizer/hierarchicalLcs.js.map +1 -1
  12. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
  13. package/dist/baselines/atomizer/inPlaceModifier.js +3 -2
  14. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
  15. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
  16. package/dist/baselines/atomizer/pipeline.js +249 -88
  17. package/dist/baselines/atomizer/pipeline.js.map +1 -1
  18. package/dist/baselines/atomizer/xmlToWmlElement.d.ts.map +1 -1
  19. package/dist/baselines/atomizer/xmlToWmlElement.js +3 -2
  20. package/dist/baselines/atomizer/xmlToWmlElement.js.map +1 -1
  21. package/dist/format-detection.d.ts.map +1 -1
  22. package/dist/format-detection.js +2 -1
  23. package/dist/format-detection.js.map +1 -1
  24. package/dist/integration/synthetic-docx-fixture.d.ts +62 -0
  25. package/dist/integration/synthetic-docx-fixture.d.ts.map +1 -0
  26. package/dist/integration/synthetic-docx-fixture.js +171 -0
  27. package/dist/integration/synthetic-docx-fixture.js.map +1 -0
  28. package/dist/move-detection.d.ts.map +1 -1
  29. package/dist/move-detection.js +3 -4
  30. package/dist/move-detection.js.map +1 -1
  31. package/dist/primitives/dom-helpers.d.ts +1 -5
  32. package/dist/primitives/dom-helpers.d.ts.map +1 -1
  33. package/dist/primitives/dom-helpers.js +12 -25
  34. package/dist/primitives/dom-helpers.js.map +1 -1
  35. package/dist/primitives/layout.d.ts.map +1 -1
  36. package/dist/primitives/layout.js +4 -6
  37. package/dist/primitives/layout.js.map +1 -1
  38. package/dist/primitives/xml.d.ts.map +1 -1
  39. package/dist/primitives/xml.js +2 -0
  40. package/dist/primitives/xml.js.map +1 -1
  41. package/package.json +3 -3
@@ -5,7 +5,8 @@
5
5
  * Integrates atomization, LCS comparison, move detection, format detection,
6
6
  * and document reconstruction.
7
7
  */
8
- import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
8
+ import { XMLSerializer } from '@xmldom/xmldom';
9
+ import { parseXml } from '../../primitives/xml.js';
9
10
  import { DocxArchive } from '../../shared/docx/DocxArchive.js';
10
11
  import { DEFAULT_MOVE_DETECTION_SETTINGS, DEFAULT_FORMAT_DETECTION_SETTINGS, CorrelationStatus, } from '../../core-types.js';
11
12
  import { atomizeTree, assignParagraphIndices } from '../../atomizer.js';
@@ -529,7 +530,7 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
529
530
  comparisonResult = selected;
530
531
  }
531
532
  else {
532
- comparisonResult = runComparisonPass(undefined, 'rebuild');
533
+ comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
533
534
  fallbackReason = 'round_trip_safety_check_failed';
534
535
  fallbackDiagnostics = {
535
536
  attempts: failedAttempts,
@@ -537,30 +538,33 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
537
538
  }
538
539
  }
539
540
  else {
540
- comparisonResult = runComparisonPass(undefined, 'rebuild');
541
+ comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
541
542
  }
542
543
  const { mergedAtoms, newDocumentXml } = comparisonResult;
543
544
  // Step 12: Clone appropriate archive and update document.xml.
544
545
  // Use the revised archive only for true inplace output.
545
546
  const baseArchive = comparisonResult.outputMode === 'inplace' ? revisedArchive : originalArchive;
547
+ // The merge source is the *opposite* archive from the base: inplace pulls
548
+ // deleted-but-still-referenced definitions from the original, rebuild pulls
549
+ // added-but-still-referenced definitions from the revised. Without this,
550
+ // rebuild output ships dangling references when the original lacks an
551
+ // auxiliary part that the revised side introduced (issue #94).
552
+ const mergeSourceArchive = comparisonResult.outputMode === 'inplace' ? originalArchive : revisedArchive;
546
553
  const resultArchive = await baseArchive.clone();
547
554
  resultArchive.setDocumentXml(newDocumentXml);
548
- // Step 12b: For inplace mode, merge auxiliary part definitions (footnotes,
549
- // endnotes, comments) from the original document. Inplace reconstruction
550
- // inserts deleted content that may reference definitions not present in the
551
- // revised archive.
552
- if (comparisonResult.outputMode === 'inplace') {
553
- const mergeResults = new Map();
554
- for (const descriptor of AUXILIARY_PARTS) {
555
- const result = await mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, newDocumentXml, descriptor);
556
- if (result.mergedIds.size > 0) {
557
- mergeResults.set(descriptor.label, result);
558
- }
559
- }
560
- // Post-merge hook for comment ancillary parts
561
- if (mergeResults.has('comment')) {
562
- await mergeCommentAncillaryParts(originalArchive, resultArchive, mergeResults.get('comment'));
563
- }
555
+ // Step 12b: Merge auxiliary part definitions (footnotes, endnotes, comments).
556
+ // Reconstruction may insert content (deleted in inplace, added in rebuild)
557
+ // whose definitions are missing from the base archive.
558
+ for (const descriptor of AUXILIARY_PARTS) {
559
+ await mergeAuxiliaryPartDefinitions(mergeSourceArchive, resultArchive, newDocumentXml, descriptor);
560
+ }
561
+ // Comment-specific post-pass: walk reply threads via commentsExtended.xml.
562
+ // Gated on root comment IDs in the *result* document (not on what the
563
+ // generic merge appended), so the pass runs even when the original already
564
+ // contains the root and revised only adds replies under it (issue #108).
565
+ const rootCommentIds = collectReferenceIds(newDocumentXml, 'w:commentReference');
566
+ if (rootCommentIds.size > 0) {
567
+ await mergeCommentAncillaryParts(mergeSourceArchive, resultArchive, rootCommentIds);
564
568
  }
565
569
  // Step 13: Save result and compute stats
566
570
  const resultBuffer = await resultArchive.save();
@@ -609,7 +613,7 @@ const AUXILIARY_PARTS = [
609
613
  */
610
614
  function collectReferenceIds(documentXml, referenceTag) {
611
615
  const ids = new Set();
612
- const doc = new DOMParser().parseFromString(documentXml, 'application/xml');
616
+ const doc = parseXml(documentXml);
613
617
  const refs = doc.getElementsByTagName(referenceTag);
614
618
  for (let i = 0; i < refs.length; i++) {
615
619
  const id = refs[i].getAttribute('w:id');
@@ -622,7 +626,7 @@ function collectReferenceIds(documentXml, referenceTag) {
622
626
  * Parse an auxiliary part and extract entry elements by ID.
623
627
  */
624
628
  function parseEntries(xml, entryTag) {
625
- const doc = new DOMParser().parseFromString(xml, 'application/xml');
629
+ const doc = parseXml(xml);
626
630
  const entries = new Map();
627
631
  const elements = doc.getElementsByTagName(entryTag);
628
632
  for (let i = 0; i < elements.length; i++) {
@@ -636,25 +640,27 @@ function parseEntries(xml, entryTag) {
636
640
  const serializer = new XMLSerializer();
637
641
  /**
638
642
  * Merge auxiliary part definitions (footnotes, endnotes, comments) from the
639
- * original archive into the result archive. When inplace mode inserts deleted
640
- * content, the corresponding definitions must exist in the auxiliary part.
643
+ * source archive into the result archive. The source archive is whichever
644
+ * side reconstruction may have introduced references to: original in inplace
645
+ * mode (deleted-but-referenced definitions), revised in rebuild mode
646
+ * (added-but-referenced definitions).
641
647
  */
642
- async function mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, documentXml, descriptor) {
648
+ async function mergeAuxiliaryPartDefinitions(sourceArchive, resultArchive, documentXml, descriptor) {
643
649
  const result = { mergedIds: new Set(), createdPart: false };
644
650
  const referencedIds = collectReferenceIds(documentXml, descriptor.referenceTag);
645
651
  if (referencedIds.size === 0)
646
652
  return result;
647
- const originalPartXml = await originalArchive.getFile(descriptor.partPath);
648
- if (!originalPartXml)
653
+ const sourcePartXml = await sourceArchive.getFile(descriptor.partPath);
654
+ if (!sourcePartXml)
649
655
  return result;
650
656
  const resultPartXml = await resultArchive.getFile(descriptor.partPath);
651
- const originalParsed = parseEntries(originalPartXml, descriptor.entryTag);
657
+ const sourceParsed = parseEntries(sourcePartXml, descriptor.entryTag);
652
658
  const resultParsed = resultPartXml ? parseEntries(resultPartXml, descriptor.entryTag) : null;
653
659
  // Find missing entries: referenced in document.xml but not in result
654
660
  const missingElements = [];
655
661
  for (const id of referencedIds) {
656
- if (!(resultParsed?.entries.has(id)) && originalParsed.entries.has(id)) {
657
- missingElements.push(originalParsed.entries.get(id));
662
+ if (!(resultParsed?.entries.has(id)) && sourceParsed.entries.has(id)) {
663
+ missingElements.push(sourceParsed.entries.get(id));
658
664
  result.mergedIds.add(id);
659
665
  }
660
666
  }
@@ -672,27 +678,33 @@ async function mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, doc
672
678
  }
673
679
  }
674
680
  else {
675
- // Create part from scratch: clone root from original, insert missing entries
676
- const newDoc = new DOMParser().parseFromString(originalPartXml, 'application/xml');
681
+ // Create part from scratch: clone root from merge source, drop every
682
+ // non-reserved entry, then append the missing referenced ones.
683
+ // Reserved entries are footnote/endnote separators identified by
684
+ // w:type="separator" / w:type="continuationSeparator" — Word expects
685
+ // them to exist and they don't carry user content. Filtering by w:type
686
+ // (not by magic w:id values) keeps this robust across authoring tools.
687
+ const newDoc = parseXml(sourcePartXml);
677
688
  const rootEl = newDoc.getElementsByTagName(descriptor.rootTag)[0];
678
689
  if (rootEl) {
679
- // Remove all existing entries — we only want the missing ones
680
690
  const existingEntries = rootEl.getElementsByTagName(descriptor.entryTag);
681
691
  const toRemove = [];
682
692
  for (let i = 0; i < existingEntries.length; i++) {
683
- toRemove.push(existingEntries[i]);
693
+ const el = existingEntries[i];
694
+ const type = el.getAttribute('w:type');
695
+ if (type !== 'separator' && type !== 'continuationSeparator') {
696
+ toRemove.push(el);
697
+ }
684
698
  }
685
699
  for (const el of toRemove) {
686
700
  rootEl.removeChild(el);
687
701
  }
688
- // Add back only the missing entries
689
702
  for (const el of missingElements) {
690
703
  const imported = newDoc.importNode(el, true);
691
704
  rootEl.appendChild(imported);
692
705
  }
693
706
  resultArchive.setFile(descriptor.partPath, serializer.serializeToString(newDoc));
694
707
  result.createdPart = true;
695
- // Bootstrap OPC metadata for the newly created part
696
708
  await ensureOpcMetadata(resultArchive, descriptor);
697
709
  }
698
710
  }
@@ -711,7 +723,7 @@ async function ensureOpcMetadata(archive, descriptor) {
711
723
  // 1. Update [Content_Types].xml
712
724
  const ctXml = await archive.getFile('[Content_Types].xml');
713
725
  if (ctXml) {
714
- const ctDoc = new DOMParser().parseFromString(ctXml, 'application/xml');
726
+ const ctDoc = parseXml(ctXml);
715
727
  const typesEl = ctDoc.documentElement;
716
728
  const overrides = typesEl.getElementsByTagNameNS(CT_NS, 'Override');
717
729
  const partName = `/${descriptor.partPath}`;
@@ -734,7 +746,7 @@ async function ensureOpcMetadata(archive, descriptor) {
734
746
  const relsPath = 'word/_rels/document.xml.rels';
735
747
  const relsXml = await archive.getFile(relsPath);
736
748
  if (relsXml) {
737
- const relsDoc = new DOMParser().parseFromString(relsXml, 'application/xml');
749
+ const relsDoc = parseXml(relsXml);
738
750
  const relsEl = relsDoc.documentElement;
739
751
  const existingRels = relsEl.getElementsByTagNameNS(REL_NS, 'Relationship');
740
752
  let found = false;
@@ -764,52 +776,154 @@ async function ensureOpcMetadata(archive, descriptor) {
764
776
  // Comment Ancillary Parts Merging
765
777
  // =============================================================================
766
778
  /**
767
- * After merging comment definitions, copy related entries from
768
- * commentsExtended.xml and people.xml for author fidelity and reply threading.
779
+ * Walk the comment reply graph from each root referenced in the result
780
+ * document, merging reply <w:comment> entries, their commentsExtended.xml
781
+ * threading entries, and people.xml authors. Replies have no
782
+ * <w:commentReference> in document.xml — they're discoverable only via
783
+ * w15:paraIdParent in commentsExtended.xml. Without this expansion, rebuild
784
+ * mode silently drops reply threads (issue #108).
769
785
  */
770
- async function mergeCommentAncillaryParts(originalArchive, resultArchive, commentMergeResult) {
771
- // Collect authors and paraIds from the merged comment entries
772
- const originalCommentsXml = await originalArchive.getFile('word/comments.xml');
773
- if (!originalCommentsXml)
786
+ async function mergeCommentAncillaryParts(sourceArchive, resultArchive, rootCommentIds) {
787
+ const sourceCommentsXml = await sourceArchive.getFile('word/comments.xml');
788
+ if (!sourceCommentsXml)
774
789
  return;
775
- const origDoc = new DOMParser().parseFromString(originalCommentsXml, 'application/xml');
776
- const mergedAuthors = new Set();
777
- const mergedParaIds = new Set();
778
- const commentEls = origDoc.getElementsByTagName('w:comment');
779
- for (let i = 0; i < commentEls.length; i++) {
780
- const el = commentEls[i];
790
+ const sourceDoc = parseXml(sourceCommentsXml);
791
+ // Build full source comment maps. Canonical paraId is the first <w:p>
792
+ // child's w14:paraId, matching getCommentElParaId() in primitives/comments.ts.
793
+ const commentById = new Map();
794
+ const paraIdByCommentId = new Map();
795
+ const commentIdByParaId = new Map();
796
+ const authorByCommentId = new Map();
797
+ const allCommentEls = sourceDoc.getElementsByTagName('w:comment');
798
+ for (let i = 0; i < allCommentEls.length; i++) {
799
+ const el = allCommentEls[i];
781
800
  const id = el.getAttribute('w:id');
782
- if (!id || !commentMergeResult.mergedIds.has(id))
801
+ if (!id)
783
802
  continue;
803
+ commentById.set(id, el);
784
804
  const author = el.getAttribute('w:author');
785
805
  if (author)
786
- mergedAuthors.add(author);
787
- // Collect paraIds from <w:p> children inside the comment
788
- const paras = el.getElementsByTagName('w:p');
789
- for (let j = 0; j < paras.length; j++) {
790
- const p = paras[j];
791
- const paraId = p.getAttribute('w14:paraId');
792
- if (paraId)
793
- mergedParaIds.add(paraId);
794
- }
795
- }
796
- // Merge commentsExtended.xml entries matching merged paraIds
797
- await mergeCommentsExtended(originalArchive, resultArchive, mergedParaIds);
798
- // Merge people.xml entries matching merged authors
799
- await mergePeople(originalArchive, resultArchive, mergedAuthors);
806
+ authorByCommentId.set(id, author);
807
+ const firstP = el.getElementsByTagName('w:p')[0];
808
+ const paraId = firstP?.getAttribute('w14:paraId');
809
+ if (paraId) {
810
+ paraIdByCommentId.set(id, paraId);
811
+ commentIdByParaId.set(paraId, id);
812
+ }
813
+ }
814
+ // Seed inclusion sets from the root IDs that appear in the result document.
815
+ const includedCommentIds = new Set();
816
+ const includedParaIds = new Set();
817
+ const includedAuthors = new Set();
818
+ for (const id of rootCommentIds) {
819
+ if (!commentById.has(id))
820
+ continue;
821
+ includedCommentIds.add(id);
822
+ const pid = paraIdByCommentId.get(id);
823
+ if (pid)
824
+ includedParaIds.add(pid);
825
+ const author = authorByCommentId.get(id);
826
+ if (author)
827
+ includedAuthors.add(author);
828
+ }
829
+ // BFS over commentsExtended.xml's paraIdParent graph from each included
830
+ // root paraId. Skip entries that don't resolve to a real source comment so
831
+ // we never pull in dangling commentEx/people without a backing definition.
832
+ const sourceExtendedXml = await sourceArchive.getFile('word/commentsExtended.xml');
833
+ if (sourceExtendedXml) {
834
+ const exDoc = parseXml(sourceExtendedXml);
835
+ const exEls = exDoc.getElementsByTagName('w15:commentEx');
836
+ const childrenOf = new Map();
837
+ for (let i = 0; i < exEls.length; i++) {
838
+ const ex = exEls[i];
839
+ const childPid = ex.getAttribute('w15:paraId');
840
+ const parentPid = ex.getAttribute('w15:paraIdParent');
841
+ if (!childPid || !parentPid)
842
+ continue;
843
+ const arr = childrenOf.get(parentPid);
844
+ if (arr)
845
+ arr.push(childPid);
846
+ else
847
+ childrenOf.set(parentPid, [childPid]);
848
+ }
849
+ const queue = [...includedParaIds];
850
+ while (queue.length > 0) {
851
+ const pid = queue.shift();
852
+ const children = childrenOf.get(pid);
853
+ if (!children)
854
+ continue;
855
+ for (const childPid of children) {
856
+ if (includedParaIds.has(childPid))
857
+ continue;
858
+ const childCommentId = commentIdByParaId.get(childPid);
859
+ if (!childCommentId)
860
+ continue;
861
+ includedParaIds.add(childPid);
862
+ includedCommentIds.add(childCommentId);
863
+ const author = authorByCommentId.get(childCommentId);
864
+ if (author)
865
+ includedAuthors.add(author);
866
+ queue.push(childPid);
867
+ }
868
+ }
869
+ }
870
+ // Append any reply <w:comment> definitions still missing from result.
871
+ // The generic merge already added roots when needed; we add the replies
872
+ // (and any roots not yet present in the result, defensively).
873
+ await mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds);
874
+ // Merge commentsExtended and people for the expanded set.
875
+ await mergeCommentsExtended(sourceArchive, resultArchive, includedParaIds);
876
+ await mergePeople(sourceArchive, resultArchive, includedAuthors);
800
877
  }
801
- async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaIds) {
878
+ /**
879
+ * Append any source <w:comment> definitions in `includedCommentIds` that
880
+ * aren't already in result/word/comments.xml. Mirrors the append-with-importNode
881
+ * pattern used by mergeCommentsExtended below.
882
+ */
883
+ async function mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds) {
884
+ if (includedCommentIds.size === 0)
885
+ return;
886
+ const resultXml = await resultArchive.getFile('word/comments.xml');
887
+ if (!resultXml) {
888
+ // If result has no comments.xml at all, the generic merge would have
889
+ // bootstrapped it for any included root. Nothing to do here.
890
+ return;
891
+ }
892
+ const resultDoc = parseXml(resultXml);
893
+ const rootEl = resultDoc.documentElement;
894
+ const existingIds = new Set();
895
+ const existing = rootEl.getElementsByTagName('w:comment');
896
+ for (let i = 0; i < existing.length; i++) {
897
+ const id = existing[i].getAttribute('w:id');
898
+ if (id)
899
+ existingIds.add(id);
900
+ }
901
+ let appended = false;
902
+ for (const id of includedCommentIds) {
903
+ if (existingIds.has(id))
904
+ continue;
905
+ const sourceEl = commentById.get(id);
906
+ if (!sourceEl)
907
+ continue;
908
+ rootEl.appendChild(resultDoc.importNode(sourceEl, true));
909
+ appended = true;
910
+ }
911
+ if (appended) {
912
+ resultArchive.setFile('word/comments.xml', serializer.serializeToString(resultDoc));
913
+ }
914
+ }
915
+ async function mergeCommentsExtended(sourceArchive, resultArchive, mergedParaIds) {
802
916
  if (mergedParaIds.size === 0)
803
917
  return;
804
- const originalXml = await originalArchive.getFile('word/commentsExtended.xml');
805
- if (!originalXml)
918
+ const sourceXml = await sourceArchive.getFile('word/commentsExtended.xml');
919
+ if (!sourceXml)
806
920
  return;
807
- const origDoc = new DOMParser().parseFromString(originalXml, 'application/xml');
808
- const origEntries = origDoc.getElementsByTagName('w15:commentEx');
921
+ const sourceDoc = parseXml(sourceXml);
922
+ const sourceEntries = sourceDoc.getElementsByTagName('w15:commentEx');
809
923
  // Collect entries whose paraId matches a merged comment's paragraph
810
924
  const entriesToMerge = [];
811
- for (let i = 0; i < origEntries.length; i++) {
812
- const el = origEntries[i];
925
+ for (let i = 0; i < sourceEntries.length; i++) {
926
+ const el = sourceEntries[i];
813
927
  const paraId = el.getAttribute('w15:paraId');
814
928
  if (paraId && mergedParaIds.has(paraId)) {
815
929
  entriesToMerge.push(el);
@@ -817,11 +931,10 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
817
931
  }
818
932
  if (entriesToMerge.length === 0)
819
933
  return;
820
- let resultXml = await resultArchive.getFile('word/commentsExtended.xml');
934
+ const resultXml = await resultArchive.getFile('word/commentsExtended.xml');
821
935
  if (resultXml) {
822
- const resultDoc = new DOMParser().parseFromString(resultXml, 'application/xml');
936
+ const resultDoc = parseXml(resultXml);
823
937
  const rootEl = resultDoc.documentElement;
824
- // Check existing paraIds to avoid duplicates
825
938
  const existingParaIds = new Set();
826
939
  const existing = rootEl.getElementsByTagName('w15:commentEx');
827
940
  for (let i = 0; i < existing.length; i++) {
@@ -836,21 +949,55 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
836
949
  }
837
950
  }
838
951
  resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(resultDoc));
952
+ return;
839
953
  }
840
- // If commentsExtended.xml doesn't exist in result, we don't create it —
841
- // the file is optional and its absence won't cause crashes.
954
+ // Bootstrap: result lacks commentsExtended.xml but the merged comments
955
+ // depend on it for reply threading / done state. Clone the source's root
956
+ // (preserves namespaces), drop non-matching entries, then add OPC metadata.
957
+ const newDoc = parseXml(sourceXml);
958
+ const newRoot = newDoc.documentElement;
959
+ const allEntries = newRoot.getElementsByTagName('w15:commentEx');
960
+ const toRemove = [];
961
+ for (let i = 0; i < allEntries.length; i++) {
962
+ const el = allEntries[i];
963
+ const paraId = el.getAttribute('w15:paraId');
964
+ if (!paraId || !mergedParaIds.has(paraId))
965
+ toRemove.push(el);
966
+ }
967
+ for (const el of toRemove)
968
+ newRoot.removeChild(el);
969
+ resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(newDoc));
970
+ await ensureOpcMetadata(resultArchive, COMMENTS_EXTENDED_DESCRIPTOR);
842
971
  }
843
- async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
972
+ const COMMENTS_EXTENDED_DESCRIPTOR = {
973
+ label: 'commentsExtended',
974
+ partPath: 'word/commentsExtended.xml',
975
+ referenceTag: '',
976
+ entryTag: 'w15:commentEx',
977
+ rootTag: 'w15:commentsEx',
978
+ contentType: 'application/vnd.ms-word.commentsExtended+xml',
979
+ relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/commentsExtended',
980
+ };
981
+ const PEOPLE_DESCRIPTOR = {
982
+ label: 'people',
983
+ partPath: 'word/people.xml',
984
+ referenceTag: '',
985
+ entryTag: 'w15:person',
986
+ rootTag: 'w15:people',
987
+ contentType: 'application/vnd.ms-word.people+xml',
988
+ relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/people',
989
+ };
990
+ async function mergePeople(sourceArchive, resultArchive, mergedAuthors) {
844
991
  if (mergedAuthors.size === 0)
845
992
  return;
846
- const originalXml = await originalArchive.getFile('word/people.xml');
847
- if (!originalXml)
993
+ const sourceXml = await sourceArchive.getFile('word/people.xml');
994
+ if (!sourceXml)
848
995
  return;
849
- const origDoc = new DOMParser().parseFromString(originalXml, 'application/xml');
850
- const origPersons = origDoc.getElementsByTagName('w15:person');
996
+ const sourceDoc = parseXml(sourceXml);
997
+ const sourcePersons = sourceDoc.getElementsByTagName('w15:person');
851
998
  const personsToMerge = [];
852
- for (let i = 0; i < origPersons.length; i++) {
853
- const el = origPersons[i];
999
+ for (let i = 0; i < sourcePersons.length; i++) {
1000
+ const el = sourcePersons[i];
854
1001
  const author = el.getAttribute('w15:author');
855
1002
  if (author && mergedAuthors.has(author)) {
856
1003
  personsToMerge.push(el);
@@ -858,11 +1005,10 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
858
1005
  }
859
1006
  if (personsToMerge.length === 0)
860
1007
  return;
861
- let resultXml = await resultArchive.getFile('word/people.xml');
1008
+ const resultXml = await resultArchive.getFile('word/people.xml');
862
1009
  if (resultXml) {
863
- const resultDoc = new DOMParser().parseFromString(resultXml, 'application/xml');
1010
+ const resultDoc = parseXml(resultXml);
864
1011
  const rootEl = resultDoc.documentElement;
865
- // Check existing authors to avoid duplicates
866
1012
  const existingAuthors = new Set();
867
1013
  const existing = rootEl.getElementsByTagName('w15:person');
868
1014
  for (let i = 0; i < existing.length; i++) {
@@ -877,9 +1023,24 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
877
1023
  }
878
1024
  }
879
1025
  resultArchive.setFile('word/people.xml', serializer.serializeToString(resultDoc));
1026
+ return;
1027
+ }
1028
+ // Bootstrap: result lacks people.xml. Clone source root (preserves
1029
+ // namespaces), remove non-matching authors, then add OPC metadata.
1030
+ const newDoc = parseXml(sourceXml);
1031
+ const newRoot = newDoc.documentElement;
1032
+ const allPersons = newRoot.getElementsByTagName('w15:person');
1033
+ const toRemove = [];
1034
+ for (let i = 0; i < allPersons.length; i++) {
1035
+ const el = allPersons[i];
1036
+ const author = el.getAttribute('w15:author');
1037
+ if (!author || !mergedAuthors.has(author))
1038
+ toRemove.push(el);
880
1039
  }
881
- // If people.xml doesn't exist in result, we don't create it —
882
- // the file is optional and its absence won't cause crashes.
1040
+ for (const el of toRemove)
1041
+ newRoot.removeChild(el);
1042
+ resultArchive.setFile('word/people.xml', serializer.serializeToString(newDoc));
1043
+ await ensureOpcMetadata(resultArchive, PEOPLE_DESCRIPTOR);
883
1044
  }
884
1045
  /**
885
1046
  * Compute comparison statistics from merged atoms.