@usejunior/docx-core 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/dist/.tsbuildinfo +1 -1
  2. package/dist/atomizer.d.ts +49 -0
  3. package/dist/atomizer.d.ts.map +1 -1
  4. package/dist/atomizer.js +90 -0
  5. package/dist/atomizer.js.map +1 -1
  6. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
  7. package/dist/baselines/atomizer/documentReconstructor.js +289 -92
  8. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
  9. package/dist/baselines/atomizer/formattingFidelity.d.ts +99 -0
  10. package/dist/baselines/atomizer/formattingFidelity.d.ts.map +1 -0
  11. package/dist/baselines/atomizer/formattingFidelity.js +449 -0
  12. package/dist/baselines/atomizer/formattingFidelity.js.map +1 -0
  13. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts +37 -0
  14. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts.map +1 -0
  15. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js +189 -0
  16. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js.map +1 -0
  17. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts +74 -0
  18. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts.map +1 -0
  19. package/dist/baselines/atomizer/inPlaceModifier-containers.js +171 -0
  20. package/dist/baselines/atomizer/inPlaceModifier-containers.js.map +1 -0
  21. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts +88 -0
  22. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts.map +1 -0
  23. package/dist/baselines/atomizer/inPlaceModifier-deletion.js +326 -0
  24. package/dist/baselines/atomizer/inPlaceModifier-deletion.js.map +1 -0
  25. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts +85 -0
  26. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts.map +1 -0
  27. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js +402 -0
  28. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js.map +1 -0
  29. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts +39 -0
  30. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts.map +1 -0
  31. package/dist/baselines/atomizer/inPlaceModifier-presplit.js +265 -0
  32. package/dist/baselines/atomizer/inPlaceModifier-presplit.js.map +1 -0
  33. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts +62 -0
  34. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts.map +1 -0
  35. package/dist/baselines/atomizer/inPlaceModifier-shared.js +139 -0
  36. package/dist/baselines/atomizer/inPlaceModifier-shared.js.map +1 -0
  37. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts +189 -0
  38. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts.map +1 -0
  39. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js +427 -0
  40. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js.map +1 -0
  41. package/dist/baselines/atomizer/inPlaceModifier.d.ts +6 -290
  42. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
  43. package/dist/baselines/atomizer/inPlaceModifier.js +23 -1828
  44. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
  45. package/dist/baselines/atomizer/pipeline.d.ts +76 -1
  46. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
  47. package/dist/baselines/atomizer/pipeline.js +445 -108
  48. package/dist/baselines/atomizer/pipeline.js.map +1 -1
  49. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
  50. package/dist/baselines/atomizer/trackChangesAcceptorAst.js +56 -160
  51. package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
  52. package/dist/compare-types.d.ts +151 -0
  53. package/dist/compare-types.d.ts.map +1 -0
  54. package/dist/compare-types.js +2 -0
  55. package/dist/compare-types.js.map +1 -0
  56. package/dist/core-types.d.ts +5 -1
  57. package/dist/core-types.d.ts.map +1 -1
  58. package/dist/core-types.js +5 -1
  59. package/dist/core-types.js.map +1 -1
  60. package/dist/footnotes.d.ts +8 -3
  61. package/dist/footnotes.d.ts.map +1 -1
  62. package/dist/footnotes.js +8 -3
  63. package/dist/footnotes.js.map +1 -1
  64. package/dist/index.d.ts +6 -150
  65. package/dist/index.d.ts.map +1 -1
  66. package/dist/index.js +6 -0
  67. package/dist/index.js.map +1 -1
  68. package/dist/integration/libreoffice-oracle.d.ts +41 -0
  69. package/dist/integration/libreoffice-oracle.d.ts.map +1 -0
  70. package/dist/integration/libreoffice-oracle.js +282 -0
  71. package/dist/integration/libreoffice-oracle.js.map +1 -0
  72. package/dist/integration/synthetic-docx-fixture.d.ts +62 -0
  73. package/dist/integration/synthetic-docx-fixture.d.ts.map +1 -0
  74. package/dist/integration/synthetic-docx-fixture.js +171 -0
  75. package/dist/integration/synthetic-docx-fixture.js.map +1 -0
  76. package/dist/primitives/accept_changes.d.ts +2 -2
  77. package/dist/primitives/accept_changes.d.ts.map +1 -1
  78. package/dist/primitives/accept_changes.js +24 -79
  79. package/dist/primitives/accept_changes.js.map +1 -1
  80. package/dist/primitives/comments.d.ts +12 -3
  81. package/dist/primitives/comments.d.ts.map +1 -1
  82. package/dist/primitives/comments.js +374 -97
  83. package/dist/primitives/comments.js.map +1 -1
  84. package/dist/primitives/content_fingerprint.d.ts +29 -0
  85. package/dist/primitives/content_fingerprint.d.ts.map +1 -0
  86. package/dist/primitives/content_fingerprint.js +63 -0
  87. package/dist/primitives/content_fingerprint.js.map +1 -0
  88. package/dist/primitives/document.d.ts +56 -15
  89. package/dist/primitives/document.d.ts.map +1 -1
  90. package/dist/primitives/document.js +303 -32
  91. package/dist/primitives/document.js.map +1 -1
  92. package/dist/primitives/document_view-comments.d.ts +18 -0
  93. package/dist/primitives/document_view-comments.d.ts.map +1 -0
  94. package/dist/primitives/document_view-comments.js +159 -0
  95. package/dist/primitives/document_view-comments.js.map +1 -0
  96. package/dist/primitives/document_view-headings.d.ts +45 -0
  97. package/dist/primitives/document_view-headings.d.ts.map +1 -0
  98. package/dist/primitives/document_view-headings.js +247 -0
  99. package/dist/primitives/document_view-headings.js.map +1 -0
  100. package/dist/primitives/document_view-styles.d.ts +11 -0
  101. package/dist/primitives/document_view-styles.d.ts.map +1 -0
  102. package/dist/primitives/document_view-styles.js +104 -0
  103. package/dist/primitives/document_view-styles.js.map +1 -0
  104. package/dist/primitives/document_view-toon.d.ts +37 -0
  105. package/dist/primitives/document_view-toon.d.ts.map +1 -0
  106. package/dist/primitives/document_view-toon.js +199 -0
  107. package/dist/primitives/document_view-toon.js.map +1 -0
  108. package/dist/primitives/document_view-types.d.ts +137 -0
  109. package/dist/primitives/document_view-types.d.ts.map +1 -0
  110. package/dist/primitives/document_view-types.js +2 -0
  111. package/dist/primitives/document_view-types.js.map +1 -0
  112. package/dist/primitives/document_view.d.ts +8 -106
  113. package/dist/primitives/document_view.d.ts.map +1 -1
  114. package/dist/primitives/document_view.js +134 -301
  115. package/dist/primitives/document_view.js.map +1 -1
  116. package/dist/primitives/dom-helpers.d.ts +9 -0
  117. package/dist/primitives/dom-helpers.d.ts.map +1 -1
  118. package/dist/primitives/dom-helpers.js +10 -1
  119. package/dist/primitives/dom-helpers.js.map +1 -1
  120. package/dist/primitives/footnotes.d.ts +4 -3
  121. package/dist/primitives/footnotes.d.ts.map +1 -1
  122. package/dist/primitives/footnotes.js +232 -44
  123. package/dist/primitives/footnotes.js.map +1 -1
  124. package/dist/primitives/formatting_tags.d.ts +6 -0
  125. package/dist/primitives/formatting_tags.d.ts.map +1 -1
  126. package/dist/primitives/formatting_tags.js +6 -1
  127. package/dist/primitives/formatting_tags.js.map +1 -1
  128. package/dist/primitives/index.d.ts +6 -0
  129. package/dist/primitives/index.d.ts.map +1 -1
  130. package/dist/primitives/index.js +5 -0
  131. package/dist/primitives/index.js.map +1 -1
  132. package/dist/primitives/layout.d.ts +4 -3
  133. package/dist/primitives/layout.d.ts.map +1 -1
  134. package/dist/primitives/layout.js +32 -3
  135. package/dist/primitives/layout.js.map +1 -1
  136. package/dist/primitives/merge_runs.d.ts +21 -3
  137. package/dist/primitives/merge_runs.d.ts.map +1 -1
  138. package/dist/primitives/merge_runs.js +32 -10
  139. package/dist/primitives/merge_runs.js.map +1 -1
  140. package/dist/primitives/namespaces.d.ts +6 -0
  141. package/dist/primitives/namespaces.d.ts.map +1 -1
  142. package/dist/primitives/namespaces.js +9 -0
  143. package/dist/primitives/namespaces.js.map +1 -1
  144. package/dist/primitives/reject_changes.d.ts +2 -2
  145. package/dist/primitives/reject_changes.d.ts.map +1 -1
  146. package/dist/primitives/reject_changes.js +24 -81
  147. package/dist/primitives/reject_changes.js.map +1 -1
  148. package/dist/primitives/semantic_tags.d.ts +7 -0
  149. package/dist/primitives/semantic_tags.d.ts.map +1 -1
  150. package/dist/primitives/semantic_tags.js +21 -3
  151. package/dist/primitives/semantic_tags.js.map +1 -1
  152. package/dist/primitives/serialize_html.d.ts +36 -0
  153. package/dist/primitives/serialize_html.d.ts.map +1 -0
  154. package/dist/primitives/serialize_html.js +393 -0
  155. package/dist/primitives/serialize_html.js.map +1 -0
  156. package/dist/primitives/serialize_markdown.d.ts +16 -0
  157. package/dist/primitives/serialize_markdown.d.ts.map +1 -0
  158. package/dist/primitives/serialize_markdown.js +300 -0
  159. package/dist/primitives/serialize_markdown.js.map +1 -0
  160. package/dist/primitives/serialize_plaintext.d.ts +15 -0
  161. package/dist/primitives/serialize_plaintext.d.ts.map +1 -0
  162. package/dist/primitives/serialize_plaintext.js +154 -0
  163. package/dist/primitives/serialize_plaintext.js.map +1 -0
  164. package/dist/primitives/styles.js +22 -22
  165. package/dist/primitives/styles.js.map +1 -1
  166. package/dist/primitives/tables.d.ts.map +1 -1
  167. package/dist/primitives/tables.js +13 -3
  168. package/dist/primitives/tables.js.map +1 -1
  169. package/dist/primitives/text.d.ts +2 -1
  170. package/dist/primitives/text.d.ts.map +1 -1
  171. package/dist/primitives/text.js +116 -12
  172. package/dist/primitives/text.js.map +1 -1
  173. package/dist/primitives/track-changes-emitter.d.ts +139 -0
  174. package/dist/primitives/track-changes-emitter.d.ts.map +1 -0
  175. package/dist/primitives/track-changes-emitter.js +241 -0
  176. package/dist/primitives/track-changes-emitter.js.map +1 -0
  177. package/dist/primitives/xml-helpers.d.ts +29 -0
  178. package/dist/primitives/xml-helpers.d.ts.map +1 -0
  179. package/dist/primitives/xml-helpers.js +35 -0
  180. package/dist/primitives/xml-helpers.js.map +1 -0
  181. package/dist/shared/ooxml/namespaces.d.ts +4 -1
  182. package/dist/shared/ooxml/namespaces.d.ts.map +1 -1
  183. package/dist/shared/ooxml/namespaces.js +4 -1
  184. package/dist/shared/ooxml/namespaces.js.map +1 -1
  185. package/package.json +7 -6
@@ -215,19 +215,140 @@ function buildFailureSummary(failureDetails) {
215
215
  }
216
216
  return Object.keys(summary).length > 0 ? summary : undefined;
217
217
  }
218
+ // Declared above splitStories so the function body never observes an
219
+ // uninitialized binding under circular imports.
220
+ const serializer = new XMLSerializer();
221
+ /**
222
+ * Split a docx into per-story XML fragments for field-closure validation.
223
+ *
224
+ * Each footnote/endnote entry is treated as an isolated story: a complex
225
+ * field whose `begin` and `end` markers straddle stories breaks Word's
226
+ * field state machine. We therefore validate each `<w:footnote>` and
227
+ * `<w:endnote>` entry independently rather than treating the whole
228
+ * `footnotes.xml`/`endnotes.xml` as one stream.
229
+ *
230
+ * Accepts arrays of sidecar XMLs (one per source archive) so callers can
231
+ * validate the union of entries from every archive that may contribute to the
232
+ * final result. Step 12 of `compareDocumentsAtomizer` merges entries from a
233
+ * mode-dependent source archive into the base archive; passing both archives'
234
+ * sidecars guarantees that whichever path the merge takes, the entries it
235
+ * could publish have already been screened. Duplicates (same `w:id` in both
236
+ * archives) yield redundant but harmless validation work.
237
+ *
238
+ * Header/footer stories are not yet covered — they require relationship
239
+ * walking to enumerate `headerN.xml`/`footerN.xml`.
240
+ *
241
+ * @conformance ECMA-376 edition 5, Part 4 § 17.16.5
242
+ * @see https://github.com/UseJunior/safe-docx/issues/212
243
+ */
244
+ export function splitStories(documentXml, footnotesXmls, endnotesXmls) {
245
+ const stories = [{ label: 'document', xml: documentXml }];
246
+ const collectEntries = (sidecars, entryTag, labelPrefix) => {
247
+ for (let s = 0; s < sidecars.length; s++) {
248
+ const sidecarXml = sidecars[s];
249
+ if (!sidecarXml)
250
+ continue;
251
+ const doc = parseXml(sidecarXml);
252
+ const entries = doc.getElementsByTagName(entryTag);
253
+ for (let i = 0; i < entries.length; i++) {
254
+ const entry = entries[i];
255
+ const id = entry.getAttribute('w:id') ?? String(i);
256
+ stories.push({
257
+ label: `${labelPrefix}[${s}]:${id}`,
258
+ xml: serializer.serializeToString(entry),
259
+ });
260
+ }
261
+ }
262
+ };
263
+ collectEntries(footnotesXmls, 'w:footnote', 'footnote');
264
+ collectEntries(endnotesXmls, 'w:endnote', 'endnote');
265
+ return stories;
266
+ }
267
+ /**
268
+ * Validate field structure integrity across one or more document stories.
269
+ *
270
+ * Enforces three constraints on complex fields **per story**:
271
+ * 1. `w:fldChar` begin/end count balance within the story.
272
+ * 2. Every `w:instrText` AND `w:delInstrText` sits inside an open field body
273
+ * (between `begin` and `separate`). Orphaned instruction text renders as
274
+ * literal text in Word.
275
+ * 3. `w:delInstrText` is nested inside a `<w:del>` ancestor (DeletedFieldCode
276
+ * schema constraint), and conversely `w:fldChar` is NEVER inside `<w:del>`
277
+ * (Word treats this as fatal and discards the field state machine).
278
+ *
279
+ * Called on both pre-accept/reject combined XML (with track-change wrappers)
280
+ * and on post-accept/reject XML (wrappers removed). Both cases must satisfy the
281
+ * field placement check; constraint (3) is vacuous post-accept/reject.
282
+ *
283
+ * Accepts either a single XML string (legacy single-story call) or an array of
284
+ * `FieldStory` fragments. Stories are validated independently and short-circuit
285
+ * on the first failure.
286
+ *
287
+ * @conformance ECMA-376 edition 5, Part 4 § 17.16.5
288
+ */
218
289
  /**
219
- * Validate field structure integrity in document XML.
290
+ * Targeted check for one of the constraints above: `w:fldChar` MUST NOT appear
291
+ * inside any `<w:del>` element. Word treats this violation as fatal — the
292
+ * field state machine is discarded and the field renders as literal-text
293
+ * fallback.
294
+ *
295
+ * Used as a combined-output safety gate alongside the per-projection
296
+ * `validateFieldStructure` checks. Kept narrower than the full structural
297
+ * validation so that legacy shapes (e.g. `delInstrText` inside `<w:moveFrom>`)
298
+ * don't trigger fallback when the inplace candidate is otherwise sound on its
299
+ * accept/reject projections.
220
300
  *
221
- * Checks that fldChar begin/end are balanced and that w:instrText only
222
- * appears inside a proper field sequence (between begin and separate).
223
- * Orphaned instrText elements render as visible text in Word.
301
+ * @conformance ECMA-376 edition 5, Part 4 § 17.16.5
302
+ * @see https://github.com/UseJunior/safe-docx/issues/217
224
303
  */
225
- function validateFieldStructure(documentXml) {
304
+ export function hasFldCharInsideDel(documentXml) {
305
+ const root = parseDocumentXml(documentXml);
306
+ let insideDelDepth = 0;
307
+ let violation = false;
308
+ function scan(node) {
309
+ if (violation)
310
+ return;
311
+ for (let child = node.firstChild; child; child = child.nextSibling) {
312
+ if (child.nodeType !== 1)
313
+ continue;
314
+ const el = child;
315
+ const tag = el.tagName;
316
+ if (tag === 'w:del') {
317
+ insideDelDepth++;
318
+ scan(el);
319
+ insideDelDepth--;
320
+ if (violation)
321
+ return;
322
+ continue;
323
+ }
324
+ if (tag === 'w:fldChar' && insideDelDepth > 0) {
325
+ violation = true;
326
+ return;
327
+ }
328
+ scan(el);
329
+ if (violation)
330
+ return;
331
+ }
332
+ }
333
+ scan(root);
334
+ return violation;
335
+ }
336
+ export function validateFieldStructure(input) {
337
+ if (typeof input === 'string') {
338
+ return validateFieldStructureForStory(input);
339
+ }
340
+ for (const story of input) {
341
+ if (!validateFieldStructureForStory(story.xml))
342
+ return false;
343
+ }
344
+ return true;
345
+ }
346
+ function validateFieldStructureForStory(documentXml) {
226
347
  const root = parseDocumentXml(documentXml);
227
- // Walk the document in order, tracking field nesting
228
348
  const allFldChars = findAllByTagName(root, 'w:fldChar');
229
349
  const allInstrTexts = findAllByTagName(root, 'w:instrText');
230
- // Quick balance check
350
+ const allDelInstrTexts = findAllByTagName(root, 'w:delInstrText');
351
+ // Constraint (1): global fldChar begin/end balance.
231
352
  let begins = 0;
232
353
  let ends = 0;
233
354
  for (const fc of allFldChars) {
@@ -239,19 +360,33 @@ function validateFieldStructure(documentXml) {
239
360
  }
240
361
  if (begins !== ends)
241
362
  return false;
242
- // Check that instrText elements are inside a field (between begin and separate).
243
- // Walk all elements in document order using a recursive scan.
244
- if (allInstrTexts.length === 0)
245
- return true; // No instrText, nothing to validate
246
- // Depth-first scan to check instrText placement
363
+ if (allFldChars.length === 0 &&
364
+ allInstrTexts.length === 0 &&
365
+ allDelInstrTexts.length === 0) {
366
+ return true;
367
+ }
368
+ // Depth-first scan tracking field nesting (for constraint 2) and <w:del>
369
+ // ancestor nesting (for constraint 3).
247
370
  let depth = 0;
248
- const pastSeparatorAtDepth = []; // track separator state per depth
371
+ const pastSeparatorAtDepth = [];
372
+ let insideDelDepth = 0;
249
373
  function scan(node) {
250
374
  for (let child = node.firstChild; child; child = child.nextSibling) {
251
375
  if (child.nodeType !== 1)
252
- continue; // skip non-elements
376
+ continue;
253
377
  const el = child;
254
- if (el.tagName === 'w:fldChar') {
378
+ const tag = el.tagName;
379
+ if (tag === 'w:del') {
380
+ insideDelDepth++;
381
+ const ok = scan(el);
382
+ insideDelDepth--;
383
+ if (!ok)
384
+ return false;
385
+ continue;
386
+ }
387
+ if (tag === 'w:fldChar') {
388
+ if (insideDelDepth > 0)
389
+ return false;
255
390
  const type = el.getAttribute('w:fldCharType');
256
391
  if (type === 'begin') {
257
392
  depth++;
@@ -266,8 +401,13 @@ function validateFieldStructure(documentXml) {
266
401
  depth--;
267
402
  }
268
403
  }
269
- else if (el.tagName === 'w:instrText') {
270
- // instrText must be inside a field (depth > 0) and before the separator
404
+ else if (tag === 'w:instrText') {
405
+ if (depth === 0 || pastSeparatorAtDepth[depth])
406
+ return false;
407
+ }
408
+ else if (tag === 'w:delInstrText') {
409
+ if (insideDelDepth === 0)
410
+ return false;
271
411
  if (depth === 0 || pastSeparatorAtDepth[depth])
272
412
  return false;
273
413
  }
@@ -278,7 +418,7 @@ function validateFieldStructure(documentXml) {
278
418
  }
279
419
  return scan(root);
280
420
  }
281
- function evaluateSafetyChecks(originalTextForRoundTrip, revisedTextForRoundTrip, originalBookmarkDiagnostics, revisedBookmarkDiagnostics, candidateXml) {
421
+ function evaluateSafetyChecks(originalTextForRoundTrip, revisedTextForRoundTrip, originalBookmarkDiagnostics, revisedBookmarkDiagnostics, candidateXml, auxiliarySidecars) {
282
422
  const acceptedXml = acceptAllChanges(candidateXml);
283
423
  const rejectedXml = rejectAllChanges(candidateXml);
284
424
  const acceptedText = extractTextWithParagraphs(acceptedXml);
@@ -289,11 +429,28 @@ function evaluateSafetyChecks(originalTextForRoundTrip, revisedTextForRoundTrip,
289
429
  const rejectTextComparison = compareTexts(originalTextForRoundTrip, rejectedText);
290
430
  const acceptBookmarksOk = bookmarkDiagnosticsSemanticallyEqual(revisedBookmarkDiagnostics, acceptedBookmarkDiagnostics);
291
431
  const rejectBookmarksOk = bookmarkDiagnosticsSemanticallyEqual(originalBookmarkDiagnostics, rejectedBookmarkDiagnostics);
292
- // Validate field structure: after accept-all and reject-all, every
293
- // w:instrText must be inside a proper field sequence (between fldChar
294
- // begin and fldChar separate). Orphaned instrText renders as visible
295
- // text in Word.
296
- const fieldStructureOk = validateFieldStructure(acceptedXml) && validateFieldStructure(rejectedXml);
432
+ // Validate field structure per-story. Each footnote/endnote entry is its own
433
+ // ECMA-376 story; a complex field that crosses a story boundary breaks
434
+ // Word's field state machine even when global begin/end counts balance.
435
+ // Sidecars from BOTH archives are validated because Step 12's auxiliary-part
436
+ // merge picks its base and source archives by reconstruction mode (inplace
437
+ // base = revised; rebuild base = original) and validating only one side
438
+ // would miss field issues that would still ship in the merged result.
439
+ // `acceptAllChanges` / `rejectAllChanges` only transform document.xml, so
440
+ // the sidecar set is identical for both transforms.
441
+ const acceptedStories = splitStories(acceptedXml, auxiliarySidecars.footnotesXmls, auxiliarySidecars.endnotesXmls);
442
+ const rejectedStories = splitStories(rejectedXml, auxiliarySidecars.footnotesXmls, auxiliarySidecars.endnotesXmls);
443
+ // Issue #217 conformance gate on the COMBINED output: w:fldChar MUST NOT
444
+ // appear inside <w:del>. ECMA-376 Part 4 § 17.16.5 makes this fatal for
445
+ // Word's field state machine. The full validateFieldStructure check is run
446
+ // on the accept/reject projections (per-story); on the combined view we
447
+ // only gate the strict no-fldChar-in-del rule because some legacy emit
448
+ // paths (e.g. delInstrText inside <w:moveFrom>) are non-conformant in shape
449
+ // but out of scope for #217.
450
+ const combinedNoFldCharInDel = !hasFldCharInsideDel(candidateXml);
451
+ const fieldStructureOk = combinedNoFldCharInDel &&
452
+ validateFieldStructure(acceptedStories) &&
453
+ validateFieldStructure(rejectedStories);
297
454
  const checks = {
298
455
  acceptText: acceptTextComparison.normalizedIdentical,
299
456
  rejectText: rejectTextComparison.normalizedIdentical,
@@ -375,6 +532,22 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
375
532
  // Extract numbering.xml if available
376
533
  const originalNumberingXml = await originalArchive.getNumberingXml() ?? undefined;
377
534
  const revisedNumberingXml = await revisedArchive.getNumberingXml() ?? undefined;
535
+ // Extract footnote/endnote sidecars from BOTH archives for per-story
536
+ // field-closure validation (issue #212). Step 12 picks the base archive by
537
+ // reconstruction mode (inplace = revised, rebuild = original) and merges
538
+ // missing referenced entries from the opposite archive. Validating both
539
+ // archives' sidecars covers the union of entries that could ship without
540
+ // having to duplicate the merge logic at safety-check time.
541
+ const [originalFootnotesXml, originalEndnotesXml, revisedFootnotesXml, revisedEndnotesXml,] = await Promise.all([
542
+ originalArchive.getFile('word/footnotes.xml'),
543
+ originalArchive.getFile('word/endnotes.xml'),
544
+ revisedArchive.getFile('word/footnotes.xml'),
545
+ revisedArchive.getFile('word/endnotes.xml'),
546
+ ]);
547
+ const auxiliarySidecars = {
548
+ footnotesXmls: [originalFootnotesXml, revisedFootnotesXml],
549
+ endnotesXmls: [originalEndnotesXml, revisedEndnotesXml],
550
+ };
378
551
  const originalPart = {
379
552
  uri: 'word/document.xml',
380
553
  contentType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml',
@@ -383,8 +556,13 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
383
556
  uri: 'word/document.xml',
384
557
  contentType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml',
385
558
  };
386
- const originalTextForRoundTrip = extractTextWithParagraphs(originalXml);
387
- const revisedTextForRoundTrip = extractTextWithParagraphs(revisedXml);
559
+ // Project each input through the SAME accept/reject operation the candidate is
560
+ // checked under, so the round-trip comparison is like-for-like even when an
561
+ // input already carries its own tracked changes (pre-tracked w:ins / w:del,
562
+ // comment anchors, multi-author stacks). For a clean input these equal the raw
563
+ // extraction, so behavior on the common case is unchanged. (#347)
564
+ const originalTextForRoundTrip = extractTextWithParagraphs(rejectAllChanges(originalXml));
565
+ const revisedTextForRoundTrip = extractTextWithParagraphs(acceptAllChanges(revisedXml));
388
566
  const originalBookmarkDiagnostics = collectBookmarkDiagnostics(originalXml);
389
567
  const revisedBookmarkDiagnostics = collectBookmarkDiagnostics(revisedXml);
390
568
  const runComparisonPass = (atomizeOptions, outputMode) => {
@@ -445,7 +623,7 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
445
623
  }
446
624
  return { mergedAtoms, newDocumentXml, outputMode };
447
625
  };
448
- const evaluateRoundTripSafety = (candidateXml) => evaluateSafetyChecks(originalTextForRoundTrip, revisedTextForRoundTrip, originalBookmarkDiagnostics, revisedBookmarkDiagnostics, candidateXml);
626
+ const evaluateRoundTripSafety = (candidateXml) => evaluateSafetyChecks(originalTextForRoundTrip, revisedTextForRoundTrip, originalBookmarkDiagnostics, revisedBookmarkDiagnostics, candidateXml, auxiliarySidecars);
449
627
  let comparisonResult;
450
628
  let fallbackReason;
451
629
  let fallbackDiagnostics;
@@ -530,7 +708,7 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
530
708
  comparisonResult = selected;
531
709
  }
532
710
  else {
533
- comparisonResult = runComparisonPass(undefined, 'rebuild');
711
+ comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
534
712
  fallbackReason = 'round_trip_safety_check_failed';
535
713
  fallbackDiagnostics = {
536
714
  attempts: failedAttempts,
@@ -538,30 +716,33 @@ export async function compareDocumentsAtomizer(original, revised, options = {})
538
716
  }
539
717
  }
540
718
  else {
541
- comparisonResult = runComparisonPass(undefined, 'rebuild');
719
+ comparisonResult = runComparisonPass({ atomizeParagraphLevelMarkers: true }, 'rebuild');
542
720
  }
543
721
  const { mergedAtoms, newDocumentXml } = comparisonResult;
544
722
  // Step 12: Clone appropriate archive and update document.xml.
545
723
  // Use the revised archive only for true inplace output.
546
724
  const baseArchive = comparisonResult.outputMode === 'inplace' ? revisedArchive : originalArchive;
725
+ // The merge source is the *opposite* archive from the base: inplace pulls
726
+ // deleted-but-still-referenced definitions from the original, rebuild pulls
727
+ // added-but-still-referenced definitions from the revised. Without this,
728
+ // rebuild output ships dangling references when the original lacks an
729
+ // auxiliary part that the revised side introduced (issue #94).
730
+ const mergeSourceArchive = comparisonResult.outputMode === 'inplace' ? originalArchive : revisedArchive;
547
731
  const resultArchive = await baseArchive.clone();
548
732
  resultArchive.setDocumentXml(newDocumentXml);
549
- // Step 12b: For inplace mode, merge auxiliary part definitions (footnotes,
550
- // endnotes, comments) from the original document. Inplace reconstruction
551
- // inserts deleted content that may reference definitions not present in the
552
- // revised archive.
553
- if (comparisonResult.outputMode === 'inplace') {
554
- const mergeResults = new Map();
555
- for (const descriptor of AUXILIARY_PARTS) {
556
- const result = await mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, newDocumentXml, descriptor);
557
- if (result.mergedIds.size > 0) {
558
- mergeResults.set(descriptor.label, result);
559
- }
560
- }
561
- // Post-merge hook for comment ancillary parts
562
- if (mergeResults.has('comment')) {
563
- await mergeCommentAncillaryParts(originalArchive, resultArchive, mergeResults.get('comment'));
564
- }
733
+ // Step 12b: Merge auxiliary part definitions (footnotes, endnotes, comments).
734
+ // Reconstruction may insert content (deleted in inplace, added in rebuild)
735
+ // whose definitions are missing from the base archive.
736
+ for (const descriptor of AUXILIARY_PARTS) {
737
+ await mergeAuxiliaryPartDefinitions(mergeSourceArchive, resultArchive, newDocumentXml, descriptor);
738
+ }
739
+ // Comment-specific post-pass: walk reply threads via commentsExtended.xml.
740
+ // Gated on root comment IDs in the *result* document (not on what the
741
+ // generic merge appended), so the pass runs even when the original already
742
+ // contains the root and revised only adds replies under it (issue #108).
743
+ const rootCommentIds = collectReferenceIds(newDocumentXml, 'w:commentReference');
744
+ if (rootCommentIds.size > 0) {
745
+ await mergeCommentAncillaryParts(mergeSourceArchive, resultArchive, rootCommentIds);
565
746
  }
566
747
  // Step 13: Save result and compute stats
567
748
  const resultBuffer = await resultArchive.save();
@@ -634,28 +815,29 @@ function parseEntries(xml, entryTag) {
634
815
  }
635
816
  return { doc, entries };
636
817
  }
637
- const serializer = new XMLSerializer();
638
818
  /**
639
819
  * Merge auxiliary part definitions (footnotes, endnotes, comments) from the
640
- * original archive into the result archive. When inplace mode inserts deleted
641
- * content, the corresponding definitions must exist in the auxiliary part.
820
+ * source archive into the result archive. The source archive is whichever
821
+ * side reconstruction may have introduced references to: original in inplace
822
+ * mode (deleted-but-referenced definitions), revised in rebuild mode
823
+ * (added-but-referenced definitions).
642
824
  */
643
- async function mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, documentXml, descriptor) {
825
+ async function mergeAuxiliaryPartDefinitions(sourceArchive, resultArchive, documentXml, descriptor) {
644
826
  const result = { mergedIds: new Set(), createdPart: false };
645
827
  const referencedIds = collectReferenceIds(documentXml, descriptor.referenceTag);
646
828
  if (referencedIds.size === 0)
647
829
  return result;
648
- const originalPartXml = await originalArchive.getFile(descriptor.partPath);
649
- if (!originalPartXml)
830
+ const sourcePartXml = await sourceArchive.getFile(descriptor.partPath);
831
+ if (!sourcePartXml)
650
832
  return result;
651
833
  const resultPartXml = await resultArchive.getFile(descriptor.partPath);
652
- const originalParsed = parseEntries(originalPartXml, descriptor.entryTag);
834
+ const sourceParsed = parseEntries(sourcePartXml, descriptor.entryTag);
653
835
  const resultParsed = resultPartXml ? parseEntries(resultPartXml, descriptor.entryTag) : null;
654
836
  // Find missing entries: referenced in document.xml but not in result
655
837
  const missingElements = [];
656
838
  for (const id of referencedIds) {
657
- if (!(resultParsed?.entries.has(id)) && originalParsed.entries.has(id)) {
658
- missingElements.push(originalParsed.entries.get(id));
839
+ if (!(resultParsed?.entries.has(id)) && sourceParsed.entries.has(id)) {
840
+ missingElements.push(sourceParsed.entries.get(id));
659
841
  result.mergedIds.add(id);
660
842
  }
661
843
  }
@@ -673,27 +855,33 @@ async function mergeAuxiliaryPartDefinitions(originalArchive, resultArchive, doc
673
855
  }
674
856
  }
675
857
  else {
676
- // Create part from scratch: clone root from original, insert missing entries
677
- const newDoc = parseXml(originalPartXml);
858
+ // Create part from scratch: clone root from merge source, drop every
859
+ // non-reserved entry, then append the missing referenced ones.
860
+ // Reserved entries are footnote/endnote separators identified by
861
+ // w:type="separator" / w:type="continuationSeparator" — Word expects
862
+ // them to exist and they don't carry user content. Filtering by w:type
863
+ // (not by magic w:id values) keeps this robust across authoring tools.
864
+ const newDoc = parseXml(sourcePartXml);
678
865
  const rootEl = newDoc.getElementsByTagName(descriptor.rootTag)[0];
679
866
  if (rootEl) {
680
- // Remove all existing entries — we only want the missing ones
681
867
  const existingEntries = rootEl.getElementsByTagName(descriptor.entryTag);
682
868
  const toRemove = [];
683
869
  for (let i = 0; i < existingEntries.length; i++) {
684
- toRemove.push(existingEntries[i]);
870
+ const el = existingEntries[i];
871
+ const type = el.getAttribute('w:type');
872
+ if (type !== 'separator' && type !== 'continuationSeparator') {
873
+ toRemove.push(el);
874
+ }
685
875
  }
686
876
  for (const el of toRemove) {
687
877
  rootEl.removeChild(el);
688
878
  }
689
- // Add back only the missing entries
690
879
  for (const el of missingElements) {
691
880
  const imported = newDoc.importNode(el, true);
692
881
  rootEl.appendChild(imported);
693
882
  }
694
883
  resultArchive.setFile(descriptor.partPath, serializer.serializeToString(newDoc));
695
884
  result.createdPart = true;
696
- // Bootstrap OPC metadata for the newly created part
697
885
  await ensureOpcMetadata(resultArchive, descriptor);
698
886
  }
699
887
  }
@@ -765,52 +953,154 @@ async function ensureOpcMetadata(archive, descriptor) {
765
953
  // Comment Ancillary Parts Merging
766
954
  // =============================================================================
767
955
  /**
768
- * After merging comment definitions, copy related entries from
769
- * commentsExtended.xml and people.xml for author fidelity and reply threading.
956
+ * Walk the comment reply graph from each root referenced in the result
957
+ * document, merging reply <w:comment> entries, their commentsExtended.xml
958
+ * threading entries, and people.xml authors. Replies have no
959
+ * <w:commentReference> in document.xml — they're discoverable only via
960
+ * w15:paraIdParent in commentsExtended.xml. Without this expansion, rebuild
961
+ * mode silently drops reply threads (issue #108).
770
962
  */
771
- async function mergeCommentAncillaryParts(originalArchive, resultArchive, commentMergeResult) {
772
- // Collect authors and paraIds from the merged comment entries
773
- const originalCommentsXml = await originalArchive.getFile('word/comments.xml');
774
- if (!originalCommentsXml)
963
+ async function mergeCommentAncillaryParts(sourceArchive, resultArchive, rootCommentIds) {
964
+ const sourceCommentsXml = await sourceArchive.getFile('word/comments.xml');
965
+ if (!sourceCommentsXml)
775
966
  return;
776
- const origDoc = parseXml(originalCommentsXml);
777
- const mergedAuthors = new Set();
778
- const mergedParaIds = new Set();
779
- const commentEls = origDoc.getElementsByTagName('w:comment');
780
- for (let i = 0; i < commentEls.length; i++) {
781
- const el = commentEls[i];
967
+ const sourceDoc = parseXml(sourceCommentsXml);
968
+ // Build full source comment maps. Canonical paraId is the first <w:p>
969
+ // child's w14:paraId, matching getCommentElParaId() in primitives/comments.ts.
970
+ const commentById = new Map();
971
+ const paraIdByCommentId = new Map();
972
+ const commentIdByParaId = new Map();
973
+ const authorByCommentId = new Map();
974
+ const allCommentEls = sourceDoc.getElementsByTagName('w:comment');
975
+ for (let i = 0; i < allCommentEls.length; i++) {
976
+ const el = allCommentEls[i];
782
977
  const id = el.getAttribute('w:id');
783
- if (!id || !commentMergeResult.mergedIds.has(id))
978
+ if (!id)
784
979
  continue;
980
+ commentById.set(id, el);
785
981
  const author = el.getAttribute('w:author');
786
982
  if (author)
787
- mergedAuthors.add(author);
788
- // Collect paraIds from <w:p> children inside the comment
789
- const paras = el.getElementsByTagName('w:p');
790
- for (let j = 0; j < paras.length; j++) {
791
- const p = paras[j];
792
- const paraId = p.getAttribute('w14:paraId');
793
- if (paraId)
794
- mergedParaIds.add(paraId);
795
- }
796
- }
797
- // Merge commentsExtended.xml entries matching merged paraIds
798
- await mergeCommentsExtended(originalArchive, resultArchive, mergedParaIds);
799
- // Merge people.xml entries matching merged authors
800
- await mergePeople(originalArchive, resultArchive, mergedAuthors);
983
+ authorByCommentId.set(id, author);
984
+ const firstP = el.getElementsByTagName('w:p')[0];
985
+ const paraId = firstP?.getAttribute('w14:paraId');
986
+ if (paraId) {
987
+ paraIdByCommentId.set(id, paraId);
988
+ commentIdByParaId.set(paraId, id);
989
+ }
990
+ }
991
+ // Seed inclusion sets from the root IDs that appear in the result document.
992
+ const includedCommentIds = new Set();
993
+ const includedParaIds = new Set();
994
+ const includedAuthors = new Set();
995
+ for (const id of rootCommentIds) {
996
+ if (!commentById.has(id))
997
+ continue;
998
+ includedCommentIds.add(id);
999
+ const pid = paraIdByCommentId.get(id);
1000
+ if (pid)
1001
+ includedParaIds.add(pid);
1002
+ const author = authorByCommentId.get(id);
1003
+ if (author)
1004
+ includedAuthors.add(author);
1005
+ }
1006
+ // BFS over commentsExtended.xml's paraIdParent graph from each included
1007
+ // root paraId. Skip entries that don't resolve to a real source comment so
1008
+ // we never pull in dangling commentEx/people without a backing definition.
1009
+ const sourceExtendedXml = await sourceArchive.getFile('word/commentsExtended.xml');
1010
+ if (sourceExtendedXml) {
1011
+ const exDoc = parseXml(sourceExtendedXml);
1012
+ const exEls = exDoc.getElementsByTagName('w15:commentEx');
1013
+ const childrenOf = new Map();
1014
+ for (let i = 0; i < exEls.length; i++) {
1015
+ const ex = exEls[i];
1016
+ const childPid = ex.getAttribute('w15:paraId');
1017
+ const parentPid = ex.getAttribute('w15:paraIdParent');
1018
+ if (!childPid || !parentPid)
1019
+ continue;
1020
+ const arr = childrenOf.get(parentPid);
1021
+ if (arr)
1022
+ arr.push(childPid);
1023
+ else
1024
+ childrenOf.set(parentPid, [childPid]);
1025
+ }
1026
+ const queue = [...includedParaIds];
1027
+ while (queue.length > 0) {
1028
+ const pid = queue.shift();
1029
+ const children = childrenOf.get(pid);
1030
+ if (!children)
1031
+ continue;
1032
+ for (const childPid of children) {
1033
+ if (includedParaIds.has(childPid))
1034
+ continue;
1035
+ const childCommentId = commentIdByParaId.get(childPid);
1036
+ if (!childCommentId)
1037
+ continue;
1038
+ includedParaIds.add(childPid);
1039
+ includedCommentIds.add(childCommentId);
1040
+ const author = authorByCommentId.get(childCommentId);
1041
+ if (author)
1042
+ includedAuthors.add(author);
1043
+ queue.push(childPid);
1044
+ }
1045
+ }
1046
+ }
1047
+ // Append any reply <w:comment> definitions still missing from result.
1048
+ // The generic merge already added roots when needed; we add the replies
1049
+ // (and any roots not yet present in the result, defensively).
1050
+ await mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds);
1051
+ // Merge commentsExtended and people for the expanded set.
1052
+ await mergeCommentsExtended(sourceArchive, resultArchive, includedParaIds);
1053
+ await mergePeople(sourceArchive, resultArchive, includedAuthors);
1054
+ }
1055
+ /**
1056
+ * Append any source <w:comment> definitions in `includedCommentIds` that
1057
+ * aren't already in result/word/comments.xml. Mirrors the append-with-importNode
1058
+ * pattern used by mergeCommentsExtended below.
1059
+ */
1060
+ async function mergeMissingCommentDefinitions(resultArchive, commentById, includedCommentIds) {
1061
+ if (includedCommentIds.size === 0)
1062
+ return;
1063
+ const resultXml = await resultArchive.getFile('word/comments.xml');
1064
+ if (!resultXml) {
1065
+ // If result has no comments.xml at all, the generic merge would have
1066
+ // bootstrapped it for any included root. Nothing to do here.
1067
+ return;
1068
+ }
1069
+ const resultDoc = parseXml(resultXml);
1070
+ const rootEl = resultDoc.documentElement;
1071
+ const existingIds = new Set();
1072
+ const existing = rootEl.getElementsByTagName('w:comment');
1073
+ for (let i = 0; i < existing.length; i++) {
1074
+ const id = existing[i].getAttribute('w:id');
1075
+ if (id)
1076
+ existingIds.add(id);
1077
+ }
1078
+ let appended = false;
1079
+ for (const id of includedCommentIds) {
1080
+ if (existingIds.has(id))
1081
+ continue;
1082
+ const sourceEl = commentById.get(id);
1083
+ if (!sourceEl)
1084
+ continue;
1085
+ rootEl.appendChild(resultDoc.importNode(sourceEl, true));
1086
+ appended = true;
1087
+ }
1088
+ if (appended) {
1089
+ resultArchive.setFile('word/comments.xml', serializer.serializeToString(resultDoc));
1090
+ }
801
1091
  }
802
- async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaIds) {
1092
+ async function mergeCommentsExtended(sourceArchive, resultArchive, mergedParaIds) {
803
1093
  if (mergedParaIds.size === 0)
804
1094
  return;
805
- const originalXml = await originalArchive.getFile('word/commentsExtended.xml');
806
- if (!originalXml)
1095
+ const sourceXml = await sourceArchive.getFile('word/commentsExtended.xml');
1096
+ if (!sourceXml)
807
1097
  return;
808
- const origDoc = parseXml(originalXml);
809
- const origEntries = origDoc.getElementsByTagName('w15:commentEx');
1098
+ const sourceDoc = parseXml(sourceXml);
1099
+ const sourceEntries = sourceDoc.getElementsByTagName('w15:commentEx');
810
1100
  // Collect entries whose paraId matches a merged comment's paragraph
811
1101
  const entriesToMerge = [];
812
- for (let i = 0; i < origEntries.length; i++) {
813
- const el = origEntries[i];
1102
+ for (let i = 0; i < sourceEntries.length; i++) {
1103
+ const el = sourceEntries[i];
814
1104
  const paraId = el.getAttribute('w15:paraId');
815
1105
  if (paraId && mergedParaIds.has(paraId)) {
816
1106
  entriesToMerge.push(el);
@@ -818,11 +1108,10 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
818
1108
  }
819
1109
  if (entriesToMerge.length === 0)
820
1110
  return;
821
- let resultXml = await resultArchive.getFile('word/commentsExtended.xml');
1111
+ const resultXml = await resultArchive.getFile('word/commentsExtended.xml');
822
1112
  if (resultXml) {
823
1113
  const resultDoc = parseXml(resultXml);
824
1114
  const rootEl = resultDoc.documentElement;
825
- // Check existing paraIds to avoid duplicates
826
1115
  const existingParaIds = new Set();
827
1116
  const existing = rootEl.getElementsByTagName('w15:commentEx');
828
1117
  for (let i = 0; i < existing.length; i++) {
@@ -837,21 +1126,55 @@ async function mergeCommentsExtended(originalArchive, resultArchive, mergedParaI
837
1126
  }
838
1127
  }
839
1128
  resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(resultDoc));
1129
+ return;
840
1130
  }
841
- // If commentsExtended.xml doesn't exist in result, we don't create it —
842
- // the file is optional and its absence won't cause crashes.
1131
+ // Bootstrap: result lacks commentsExtended.xml but the merged comments
1132
+ // depend on it for reply threading / done state. Clone the source's root
1133
+ // (preserves namespaces), drop non-matching entries, then add OPC metadata.
1134
+ const newDoc = parseXml(sourceXml);
1135
+ const newRoot = newDoc.documentElement;
1136
+ const allEntries = newRoot.getElementsByTagName('w15:commentEx');
1137
+ const toRemove = [];
1138
+ for (let i = 0; i < allEntries.length; i++) {
1139
+ const el = allEntries[i];
1140
+ const paraId = el.getAttribute('w15:paraId');
1141
+ if (!paraId || !mergedParaIds.has(paraId))
1142
+ toRemove.push(el);
1143
+ }
1144
+ for (const el of toRemove)
1145
+ newRoot.removeChild(el);
1146
+ resultArchive.setFile('word/commentsExtended.xml', serializer.serializeToString(newDoc));
1147
+ await ensureOpcMetadata(resultArchive, COMMENTS_EXTENDED_DESCRIPTOR);
843
1148
  }
844
- async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
1149
+ const COMMENTS_EXTENDED_DESCRIPTOR = {
1150
+ label: 'commentsExtended',
1151
+ partPath: 'word/commentsExtended.xml',
1152
+ referenceTag: '',
1153
+ entryTag: 'w15:commentEx',
1154
+ rootTag: 'w15:commentsEx',
1155
+ contentType: 'application/vnd.ms-word.commentsExtended+xml',
1156
+ relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/commentsExtended',
1157
+ };
1158
+ const PEOPLE_DESCRIPTOR = {
1159
+ label: 'people',
1160
+ partPath: 'word/people.xml',
1161
+ referenceTag: '',
1162
+ entryTag: 'w15:person',
1163
+ rootTag: 'w15:people',
1164
+ contentType: 'application/vnd.ms-word.people+xml',
1165
+ relationshipType: 'http://schemas.microsoft.com/office/2011/relationships/people',
1166
+ };
1167
+ async function mergePeople(sourceArchive, resultArchive, mergedAuthors) {
845
1168
  if (mergedAuthors.size === 0)
846
1169
  return;
847
- const originalXml = await originalArchive.getFile('word/people.xml');
848
- if (!originalXml)
1170
+ const sourceXml = await sourceArchive.getFile('word/people.xml');
1171
+ if (!sourceXml)
849
1172
  return;
850
- const origDoc = parseXml(originalXml);
851
- const origPersons = origDoc.getElementsByTagName('w15:person');
1173
+ const sourceDoc = parseXml(sourceXml);
1174
+ const sourcePersons = sourceDoc.getElementsByTagName('w15:person');
852
1175
  const personsToMerge = [];
853
- for (let i = 0; i < origPersons.length; i++) {
854
- const el = origPersons[i];
1176
+ for (let i = 0; i < sourcePersons.length; i++) {
1177
+ const el = sourcePersons[i];
855
1178
  const author = el.getAttribute('w15:author');
856
1179
  if (author && mergedAuthors.has(author)) {
857
1180
  personsToMerge.push(el);
@@ -859,11 +1182,10 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
859
1182
  }
860
1183
  if (personsToMerge.length === 0)
861
1184
  return;
862
- let resultXml = await resultArchive.getFile('word/people.xml');
1185
+ const resultXml = await resultArchive.getFile('word/people.xml');
863
1186
  if (resultXml) {
864
1187
  const resultDoc = parseXml(resultXml);
865
1188
  const rootEl = resultDoc.documentElement;
866
- // Check existing authors to avoid duplicates
867
1189
  const existingAuthors = new Set();
868
1190
  const existing = rootEl.getElementsByTagName('w15:person');
869
1191
  for (let i = 0; i < existing.length; i++) {
@@ -878,9 +1200,24 @@ async function mergePeople(originalArchive, resultArchive, mergedAuthors) {
878
1200
  }
879
1201
  }
880
1202
  resultArchive.setFile('word/people.xml', serializer.serializeToString(resultDoc));
1203
+ return;
1204
+ }
1205
+ // Bootstrap: result lacks people.xml. Clone source root (preserves
1206
+ // namespaces), remove non-matching authors, then add OPC metadata.
1207
+ const newDoc = parseXml(sourceXml);
1208
+ const newRoot = newDoc.documentElement;
1209
+ const allPersons = newRoot.getElementsByTagName('w15:person');
1210
+ const toRemove = [];
1211
+ for (let i = 0; i < allPersons.length; i++) {
1212
+ const el = allPersons[i];
1213
+ const author = el.getAttribute('w15:author');
1214
+ if (!author || !mergedAuthors.has(author))
1215
+ toRemove.push(el);
881
1216
  }
882
- // If people.xml doesn't exist in result, we don't create it —
883
- // the file is optional and its absence won't cause crashes.
1217
+ for (const el of toRemove)
1218
+ newRoot.removeChild(el);
1219
+ resultArchive.setFile('word/people.xml', serializer.serializeToString(newDoc));
1220
+ await ensureOpcMetadata(resultArchive, PEOPLE_DESCRIPTOR);
884
1221
  }
885
1222
  /**
886
1223
  * Compute comparison statistics from merged atoms.