npm - @usejunior/docx-core - Versions diffs - 0.8.2 → 0.9.1 - Mend

@usejunior/docx-core 0.8.2 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/dist/.tsbuildinfo +1 -1
package/dist/atomizer.d.ts +35 -0
package/dist/atomizer.d.ts.map +1 -1
package/dist/atomizer.js +66 -4
package/dist/atomizer.js.map +1 -1
package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
package/dist/baselines/atomizer/documentReconstructor.js +77 -7
package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
package/dist/baselines/atomizer/hierarchicalLcs.d.ts.map +1 -1
package/dist/baselines/atomizer/hierarchicalLcs.js +55 -69
package/dist/baselines/atomizer/hierarchicalLcs.js.map +1 -1
package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
package/dist/baselines/atomizer/inPlaceModifier.js +3 -2
package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
package/dist/baselines/atomizer/pipeline.js +249 -88
package/dist/baselines/atomizer/pipeline.js.map +1 -1
package/dist/baselines/atomizer/xmlToWmlElement.d.ts.map +1 -1
package/dist/baselines/atomizer/xmlToWmlElement.js +3 -2
package/dist/baselines/atomizer/xmlToWmlElement.js.map +1 -1
package/dist/format-detection.d.ts.map +1 -1
package/dist/format-detection.js +2 -1
package/dist/format-detection.js.map +1 -1
package/dist/integration/synthetic-docx-fixture.d.ts +62 -0
package/dist/integration/synthetic-docx-fixture.d.ts.map +1 -0
package/dist/integration/synthetic-docx-fixture.js +171 -0
package/dist/integration/synthetic-docx-fixture.js.map +1 -0
package/dist/move-detection.d.ts.map +1 -1
package/dist/move-detection.js +3 -4
package/dist/move-detection.js.map +1 -1
package/dist/primitives/dom-helpers.d.ts +1 -5
package/dist/primitives/dom-helpers.d.ts.map +1 -1
package/dist/primitives/dom-helpers.js +12 -25
package/dist/primitives/dom-helpers.js.map +1 -1
package/dist/primitives/layout.d.ts.map +1 -1
package/dist/primitives/layout.js +4 -6
package/dist/primitives/layout.js.map +1 -1
package/dist/primitives/xml.d.ts.map +1 -1
package/dist/primitives/xml.js +2 -0
package/dist/primitives/xml.js.map +1 -1
package/package.json +3 -3

package/dist/baselines/atomizer/pipeline.js CHANGED Viewed

@@ -5,7 +5,8 @@
  * Integrates atomization, LCS comparison, move detection, format detection,
  * and document reconstruction.
  */
-import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
+import { XMLSerializer } from '@xmldom/xmldom';
+import { parseXml } from '../../primitives/xml.js';
 import { DocxArchive } from '../../shared/docx/DocxArchive.js';
 import { DEFAULT_MOVE_DETECTION_SETTINGS, DEFAULT_FORMAT_DETECTION_SETTINGS, CorrelationStatus, } from '../../core-types.js';
 import { atomizeTree, assignParagraphIndices } from '../../atomizer.js';
@@ -529,7 +530,7 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
             comparisonResult = selected;
         }
         else {
-            comparisonResult = runComparisonPass(undefined, 'rebuild');
+            comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
             fallbackReason = 'round_trip_safety_check_failed';
             fallbackDiagnostics = {
                 attempts: failedAttempts,
@@ -537,30 +538,33 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
         }
     }
     else {
-        comparisonResult = runComparisonPass(undefined, 'rebuild');
+        comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
     }
     const { mergedAtoms, newDocumentXml } = comparisonResult;
     // Step 12: Clone appropriate archive and update document.xml.
     // Use the revised archive only for true inplace output.
     const baseArchive = comparisonResult.outputMode === 'inplace' ? revisedArchive : originalArchive;
+    // The merge source is the *opposite* archive from the base: inplace pulls
+    // deleted-but-still-referenced definitions from the original, rebuild pulls
+    // added-but-still-referenced definitions from the revised. Without this,
+    // rebuild output ships dangling references when the original lacks an
+    // auxiliary part that the revised side introduced (issue #94).
+    const mergeSourceArchive = comparisonResult.outputMode === 'inplace' ? originalArchive : revisedArchive;
     const resultArchive = await baseArchive.clone();
     resultArchive.setDocumentXml(newDocumentXml);
-    // Step 12b: For inplace mode, merge auxiliary part definitions (footnotes,
-    // endnotes, comments) from the original document. Inplace reconstruction
-    // inserts deleted content that may reference definitions not present in the
-    // revised archive.
-    if (comparisonResult.outputMode === 'inplace') {
-        const mergeResults = new Map();
-        for (const descriptor of AUXILIARY_PARTS) {
-            const result = await mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, newDocumentXml, descriptor);
-            if (result.mergedIds.size > 0) {
-                mergeResults.set(descriptor.label, result);
-            }
-        }
-        // Post-merge hook for comment ancillary parts
-        if (mergeResults.has('comment')) {
-            await mergeCommentAncillaryParts(originalArchive, resultArchive, mergeResults.get('comment'));
-        }
+    // Step 12b: Merge auxiliary part definitions (footnotes, endnotes, comments).
+    // Reconstruction may insert content (deleted in inplace, added in rebuild)
+    // whose definitions are missing from the base archive.
+    for (const descriptor of AUXILIARY_PARTS) {
+        await mergeAuxiliaryPartDefinitions(mergeSourceArchive, resultArchive, newDocumentXml, descriptor);
+    }
+    // Comment-specific post-pass: walk reply threads via commentsExtended.xml.
+    // Gated on root comment IDs in the *result* document (not on what the
+    // generic merge appended), so the pass runs even when the original already
+    // contains the root and revised only adds replies under it (issue #108).
+    const rootCommentIds = collectReferenceIds(newDocumentXml, 'w:commentReference');
+    if (rootCommentIds.size > 0) {
+        await mergeCommentAncillaryParts(mergeSourceArchive, resultArchive, rootCommentIds);
     }
     // Step 13: Save result and compute stats
     const resultBuffer = await resultArchive.save();
@@ -609,7 +613,7 @@ const AUXILIARY_PARTS = [
  */
 function collectReferenceIds(documentXml, referenceTag) {
     const ids = new Set();
-    const doc = new DOMParser().parseFromString(documentXml, 'application/xml');
+    const doc = parseXml(documentXml);
     const refs = doc.getElementsByTagName(referenceTag);
     for (let i = 0; i < refs.length; i++) {
         const id = refs[i].getAttribute('w:id');
@@ -622,7 +626,7 @@ function collectReferenceIds(documentXml, referenceTag) {
  * Parse an auxiliary part and extract entry elements by ID.
  */
 function parseEntries(xml, entryTag) {
-    const doc = new DOMParser().parseFromString(xml, 'application/xml');
+    const doc = parseXml(xml);
     const entries = new Map();
     const elements = doc.getElementsByTagName(entryTag);
     for (let i = 0; i < elements.length; i++) {
@@ -636,25 +640,27 @@ function parseEntries(xml, entryTag) {
 const serializer = new XMLSerializer();
 /**
  * Merge auxiliary part definitions (footnotes, endnotes, comments) from the
- * original archive into the result archive. When inplace mode inserts deleted
- * content, the corresponding definitions must exist in the auxiliary part.
+ * source archive into the result archive. The source archive is whichever
+ * side reconstruction may have introduced references to: original in inplace
+ * mode (deleted-but-referenced definitions), revised in rebuild mode
+ * (added-but-referenced definitions).
  */
-async function mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, documentXml, descriptor) {
+async function mergeAuxiliaryPartDefinitions(sourceArchive, resultArchive, documentXml, descriptor) {
     const result = { mergedIds: new Set(), createdPart: false };
     const referencedIds = collectReferenceIds(documentXml, descriptor.referenceTag);
     if (referencedIds.size === 0)
         return result;
-    const originalPartXml = await originalArchive.getFile(descriptor.partPath);
-    if (!originalPartXml)
+    const sourcePartXml = await sourceArchive.getFile(descriptor.partPath);
+    if (!sourcePartXml)
         return result;
     const resultPartXml = await resultArchive.getFile(descriptor.partPath);
-    const originalParsed = parseEntries(originalPartXml, descriptor.entryTag);
+    const sourceParsed = parseEntries(sourcePartXml, descriptor.entryTag);
     const resultParsed = resultPartXml ? parseEntries(resultPartXml, descriptor.entryTag) : null;
     // Find missing entries: referenced in document.xml but not in result
     const missingElements = [];
     for (const id of referencedIds) {
-        if (!(resultParsed?.entries.has(id)) && originalParsed.entries.has(id)) {
-            missingElements.push(originalParsed.entries.get(id));
+        if (!(resultParsed?.entries.has(id)) && sourceParsed.entries.has(id)) {
+            missingElements.push(sourceParsed.entries.get(id));
             result.mergedIds.add(id);
         }
     }
@@ -672,27 +678,33 @@ async function mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, doc
         }
     }
     else {
-        // Create part from scratch: clone root from original, insert missing entries
-        const newDoc = new DOMParser().parseFromString(originalPartXml, 'application/xml');
+        // Create part from scratch: clone root from merge source, drop every
+        // non-reserved entry, then append the missing referenced ones.
+        // Reserved entries are footnote/endnote separators identified by
+        // w:type="separator" / w:type="continuationSeparator" — Word expects
+        // them to exist and they don't carry user content. Filtering by w:type
+        // (not by magic w:id values) keeps this robust across authoring tools.
+        const newDoc = parseXml(sourcePartXml);
         const rootEl = newDoc.getElementsByTagName(descriptor.rootTag)[0];
         if (rootEl) {
-            // Remove all existing entries — we only want the missing ones
             const existingEntries = rootEl.getElementsByTagName(descriptor.entryTag);
             const toRemove = [];
             for (let i = 0; i < existingEntries.length; i++) {
-                toRemove.push(existingEntries[i]);
+                const el = existingEntries[i];
+                const type = el.getAttribute('w:type');
+                if (type !== 'separator' && type !== 'continuationSeparator') {
+                    toRemove.push(el);
+                }
             }
             for (const el of toRemove) {
                 rootEl.removeChild(el);
             }
-            // Add back only the missing entries
             for (const el of missingElements) {
                 const imported = newDoc.importNode(el, true);
                 rootEl.appendChild(imported);
             }
             resultArchive.setFile(descriptor.partPath, serializer.serializeToString(newDoc));
             result.createdPart = true;
-            // Bootstrap OPC metadata for the newly created part
             await ensureOpcMetadata(resultArchive, descriptor);
         }
     }
@@ -711,7 +723,7 @@ async function ensureOpcMetadata(archive, descriptor) {
     // 1. Update [Content_Types].xml
     const ctXml = await archive.getFile('[Content_Types].xml');
     if (ctXml) {
-        const ctDoc = new DOMParser().parseFromString(ctXml, 'application/xml');
+        const ctDoc = parseXml(ctXml);
         const typesEl = ctDoc.documentElement;
         const overrides = typesEl.getElementsByTagNameNS(CT_NS, 'Override');
         const partName = `/${descriptor.partPath}`;
@@ -734,7 +746,7 @@ async function ensureOpcMetadata(archive, descriptor) {
     const relsPath = 'word/_rels/document.xml.rels';
     const relsXml = await archive.getFile(relsPath);
     if (relsXml) {
-        const relsDoc = new DOMParser().parseFromString(relsXml, 'application/xml');
+        const relsDoc = parseXml(relsXml);
         const relsEl = relsDoc.documentElement;
         const existingRels = relsEl.getElementsByTagNameNS(REL_NS, 'Relationship');
         let found = false;
@@ -764,52 +776,154 @@ async function ensureOpcMetadata(archive, descriptor) {
 // Comment Ancillary Parts Merging
 // =============================================================================
 /**
- * After merging comment definitions, copy related entries from
- * commentsExtended.xml and people.xml for author fidelity and reply threading.
+ * Walk the comment reply graph from each root referenced in the result
+ * document, merging reply <w:comment> entries, their commentsExtended.xml
+ * threading entries, and people.xml authors. Replies have no
+ * <w:commentReference> in document.xml — they're discoverable only via
+ * w15:paraIdParent in commentsExtended.xml. Without this expansion, rebuild
+ * mode silently drops reply threads (issue #108).
  */
-async function mergeCommentAncillaryParts(originalArchive, resultArchive, commentMergeResult) {
-    // Collect authors and paraIds from the merged comment entries
-    const originalCommentsXml = await originalArchive.getFile('word/comments.xml');
-    if (!originalCommentsXml)
+async function mergeCommentAncillaryParts(sourceArchive, resultArchive, rootCommentIds) {
+    const sourceCommentsXml = await sourceArchive.getFile('word/comments.xml');
+    if (!sourceCommentsXml)
         return;
-    const origDoc = new DOMParser().parseFromString(originalCommentsXml, 'application/xml');
-    const mergedAuthors = new Set();
-    const mergedParaIds = new Set();
-    const commentEls = origDoc.getElementsByTagName('w:comment');
-    for (let i = 0; i < commentEls.length; i++) {
-        const el = commentEls[i];
+    const sourceDoc = parseXml(sourceCommentsXml);
+    // Build full source comment maps. Canonical paraId is the first <w:p>
+    // child's w14:paraId, matching getCommentElParaId() in primitives/comments.ts.
+    const commentById = new Map();
+    const paraIdByCommentId = new Map();
+    const commentIdByParaId = new Map();
+    const authorByCommentId = new Map();
+    const allCommentEls = sourceDoc.getElementsByTagName('w:comment');
+    for (let i = 0; i < allCommentEls.length; i++) {
+        const el = allCommentEls[i];
         const id = el.getAttribute('w:id');
-        if (!id || !commentMergeResult.mergedIds.has(id))
+        if (!id)
             continue;
+        commentById.set(id, el);
         const author = el.getAttribute('w:author');
         if (author)
-            mergedAuthors.add(author);
-        // Collect paraIds from <w:p> children inside the comment
-        const paras = el.getElementsByTagName('w:p');
-        for (let j = 0; j < paras.length; j++) {
-            const p = paras[j];
-            const paraId = p.getAttribute('w14:paraId');
-            if (paraId)
-                mergedParaIds.add(paraId);
-        }
-    }
-    // Merge commentsExtended.xml entries matching merged paraIds
-    await mergeCommentsExtended(originalArchive, resultArchive, mergedParaIds);
-    // Merge people.xml entries matching merged authors
-    await mergePeople(originalArchive, resultArchive, mergedAuthors);
+            authorByCommentId.set(id, author);
+        const firstP = el.getElementsByTagName('w:p')[0];
+        const paraId = firstP?.getAttribute('w14:paraId');
+        if (paraId) {
+            paraIdByCommentId.set(id, paraId);
+            commentIdByParaId.set(paraId, id);
+        }
+    }
+    // Seed inclusion sets from the root IDs that appear in the result document.
+    const includedCommentIds = new Set();
+    const includedParaIds = new Set();
+    const includedAuthors = new Set();
+    for (const id of rootCommentIds) {
+        if (!commentById.has(id))
+            continue;
+        includedCommentIds.add(id);
+        const pid = paraIdByCommentId.get(id);
+        if (pid)
+            includedParaIds.add(pid);
+        const author = authorByCommentId.get(id);
+        if (author)
+            includedAuthors.add(author);
+    }
+    // BFS over commentsExtended.xml's paraIdParent graph from each included
+    // root paraId. Skip entries that don't resolve to a real source comment so
+    // we never pull in dangling commentEx/people without a backing definition.
+    const sourceExtendedXml = await sourceArchive.getFile('word/commentsExtended.xml');
+    if (sourceExtendedXml) {
+        const exDoc = parseXml(sourceExtendedXml);
+        const exEls = exDoc.getElementsByTagName('w15:commentEx');
+        const childrenOf = new Map();
+        for (let i = 0; i < exEls.length; i++) {
+            const ex = exEls[i];
+            const childPid = ex.getAttribute('w15:paraId');
+            const parentPid = ex.getAttribute('w15:paraIdParent');
+            if (!childPid || !parentPid)
+                continue;
+            const arr = childrenOf.get(parentPid);
+            if (arr)
+                arr.push(childPid);
+            else
+                childrenOf.set(parentPid, [childPid]);
+        }
+        const queue = [...includedParaIds];
+        while (queue.length > 0) {
+            const pid = queue.shift();
+            const children = childrenOf.get(pid);
+            if (!children)
+                continue;
+            for (const childPid of children) {
+                if (includedParaIds.has(childPid))
+                    continue;
+                const childCommentId = commentIdByParaId.get(childPid);
+                if (!childCommentId)
+                    continue;
+                includedParaIds.add(childPid);
+                includedCommentIds.add(childCommentId);
+                const author = authorByCommentId.get(childCommentId);
+                if (author)
+                    includedAuthors.add(author);
+                queue.push(childPid);
+            }
+        }
+    }
+    // Append any reply <w:comment> definitions still missing from result.
+    // The generic merge already added roots when needed; we add the replies
+    // (and any roots not yet present in the result, defensively).
+    await mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds);
+    // Merge commentsExtended and people for the expanded set.
+    await mergeCommentsExtended(sourceArchive, resultArchive, includedParaIds);
+    await mergePeople(sourceArchive, resultArchive, includedAuthors);
 }
-async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaIds) {
+/**
+ * Append any source <w:comment> definitions in `includedCommentIds` that
+ * aren't already in result/word/comments.xml. Mirrors the append-with-importNode
+ * pattern used by mergeCommentsExtended below.
+ */
+async function mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds) {
+    if (includedCommentIds.size === 0)
+        return;
+    const resultXml = await resultArchive.getFile('word/comments.xml');
+    if (!resultXml) {
+        // If result has no comments.xml at all, the generic merge would have
+        // bootstrapped it for any included root. Nothing to do here.
+        return;
+    }
+    const resultDoc = parseXml(resultXml);
+    const rootEl = resultDoc.documentElement;
+    const existingIds = new Set();
+    const existing = rootEl.getElementsByTagName('w:comment');
+    for (let i = 0; i < existing.length; i++) {
+        const id = existing[i].getAttribute('w:id');
+        if (id)
+            existingIds.add(id);
+    }
+    let appended = false;
+    for (const id of includedCommentIds) {
+        if (existingIds.has(id))
+            continue;
+        const sourceEl = commentById.get(id);
+        if (!sourceEl)
+            continue;
+        rootEl.appendChild(resultDoc.importNode(sourceEl, true));
+        appended = true;
+    }
+    if (appended) {
+        resultArchive.setFile('word/comments.xml', serializer.serializeToString(resultDoc));
+    }
+}
+async function mergeCommentsExtended(sourceArchive, resultArchive, mergedParaIds) {
     if (mergedParaIds.size === 0)
         return;
-    const originalXml = await originalArchive.getFile('word/commentsExtended.xml');
-    if (!originalXml)
+    const sourceXml = await sourceArchive.getFile('word/commentsExtended.xml');
+    if (!sourceXml)
         return;
-    const origDoc = new DOMParser().parseFromString(originalXml, 'application/xml');
-    const origEntries = origDoc.getElementsByTagName('w15:commentEx');
+    const sourceDoc = parseXml(sourceXml);
+    const sourceEntries = sourceDoc.getElementsByTagName('w15:commentEx');
     // Collect entries whose paraId matches a merged comment's paragraph
     const entriesToMerge = [];
-    for (let i = 0; i < origEntries.length; i++) {
-        const el = origEntries[i];
+    for (let i = 0; i < sourceEntries.length; i++) {
+        const el = sourceEntries[i];
         const paraId = el.getAttribute('w15:paraId');
         if (paraId && mergedParaIds.has(paraId)) {
             entriesToMerge.push(el);
@@ -817,11 +931,10 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
     }
     if (entriesToMerge.length === 0)
         return;
-    let resultXml = await resultArchive.getFile('word/commentsExtended.xml');
+    const resultXml = await resultArchive.getFile('word/commentsExtended.xml');
     if (resultXml) {
-        const resultDoc = new DOMParser().parseFromString(resultXml, 'application/xml');
+        const resultDoc = parseXml(resultXml);
         const rootEl = resultDoc.documentElement;
-        // Check existing paraIds to avoid duplicates
         const existingParaIds = new Set();
         const existing = rootEl.getElementsByTagName('w15:commentEx');
         for (let i = 0; i < existing.length; i++) {
@@ -836,21 +949,55 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
             }
         }
         resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(resultDoc));
+        return;
     }
-    // If commentsExtended.xml doesn't exist in result, we don't create it —
-    // the file is optional and its absence won't cause crashes.
+    // Bootstrap: result lacks commentsExtended.xml but the merged comments
+    // depend on it for reply threading / done state. Clone the source's root
+    // (preserves namespaces), drop non-matching entries, then add OPC metadata.
+    const newDoc = parseXml(sourceXml);
+    const newRoot = newDoc.documentElement;
+    const allEntries = newRoot.getElementsByTagName('w15:commentEx');
+    const toRemove = [];
+    for (let i = 0; i < allEntries.length; i++) {
+        const el = allEntries[i];
+        const paraId = el.getAttribute('w15:paraId');
+        if (!paraId || !mergedParaIds.has(paraId))
+            toRemove.push(el);
+    }
+    for (const el of toRemove)
+        newRoot.removeChild(el);
+    resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(newDoc));
+    await ensureOpcMetadata(resultArchive, COMMENTS_EXTENDED_DESCRIPTOR);
 }
-async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
+const COMMENTS_EXTENDED_DESCRIPTOR = {
+    label: 'commentsExtended',
+    partPath: 'word/commentsExtended.xml',
+    referenceTag: '',
+    entryTag: 'w15:commentEx',
+    rootTag: 'w15:commentsEx',
+    contentType: 'application/vnd.ms-word.commentsExtended+xml',
+    relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/commentsExtended',
+};
+const PEOPLE_DESCRIPTOR = {
+    label: 'people',
+    partPath: 'word/people.xml',
+    referenceTag: '',
+    entryTag: 'w15:person',
+    rootTag: 'w15:people',
+    contentType: 'application/vnd.ms-word.people+xml',
+    relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/people',
+};
+async function mergePeople(sourceArchive, resultArchive, mergedAuthors) {
     if (mergedAuthors.size === 0)
         return;
-    const originalXml = await originalArchive.getFile('word/people.xml');
-    if (!originalXml)
+    const sourceXml = await sourceArchive.getFile('word/people.xml');
+    if (!sourceXml)
         return;
-    const origDoc = new DOMParser().parseFromString(originalXml, 'application/xml');
-    const origPersons = origDoc.getElementsByTagName('w15:person');
+    const sourceDoc = parseXml(sourceXml);
+    const sourcePersons = sourceDoc.getElementsByTagName('w15:person');
     const personsToMerge = [];
-    for (let i = 0; i < origPersons.length; i++) {
-        const el = origPersons[i];
+    for (let i = 0; i < sourcePersons.length; i++) {
+        const el = sourcePersons[i];
         const author = el.getAttribute('w15:author');
         if (author && mergedAuthors.has(author)) {
             personsToMerge.push(el);
@@ -858,11 +1005,10 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
     }
     if (personsToMerge.length === 0)
         return;
-    let resultXml = await resultArchive.getFile('word/people.xml');
+    const resultXml = await resultArchive.getFile('word/people.xml');
     if (resultXml) {
-        const resultDoc = new DOMParser().parseFromString(resultXml, 'application/xml');
+        const resultDoc = parseXml(resultXml);
         const rootEl = resultDoc.documentElement;
-        // Check existing authors to avoid duplicates
         const existingAuthors = new Set();
         const existing = rootEl.getElementsByTagName('w15:person');
         for (let i = 0; i < existing.length; i++) {
@@ -877,9 +1023,24 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
             }
         }
         resultArchive.setFile('word/people.xml', serializer.serializeToString(resultDoc));
+        return;
+    }
+    // Bootstrap: result lacks people.xml. Clone source root (preserves
+    // namespaces), remove non-matching authors, then add OPC metadata.
+    const newDoc = parseXml(sourceXml);
+    const newRoot = newDoc.documentElement;
+    const allPersons = newRoot.getElementsByTagName('w15:person');
+    const toRemove = [];
+    for (let i = 0; i < allPersons.length; i++) {
+        const el = allPersons[i];
+        const author = el.getAttribute('w15:author');
+        if (!author || !mergedAuthors.has(author))
+            toRemove.push(el);
     }
-    // If people.xml doesn't exist in result, we don't create it —
-    // the file is optional and its absence won't cause crashes.
+    for (const el of toRemove)
+        newRoot.removeChild(el);
+    resultArchive.setFile('word/people.xml', serializer.serializeToString(newDoc));
+    await ensureOpcMetadata(resultArchive, PEOPLE_DESCRIPTOR);
 }
 /**
  * Compute comparison statistics from merged atoms.