@beyondwork/docx-react-component 1.0.58 → 1.0.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +2 -2
  2. package/package.json +2 -1
  3. package/src/api/awareness-identity-types.ts +4 -2
  4. package/src/api/comment-negotiation-types.ts +4 -1
  5. package/src/api/external-custody-types.ts +16 -0
  6. package/src/api/internal/build-ref-projections.ts +108 -0
  7. package/src/api/package-version.ts +1 -1
  8. package/src/api/participants-types.ts +11 -1
  9. package/src/api/public-types.ts +978 -10
  10. package/src/api/scope-metadata-resolver-types.ts +6 -0
  11. package/src/compare/diff-engine.ts +3 -0
  12. package/src/core/commands/formatting-commands.ts +1 -0
  13. package/src/core/commands/index.ts +225 -16
  14. package/src/core/commands/legacy-form-field-commands.ts +181 -0
  15. package/src/core/commands/table-structure-commands.ts +149 -31
  16. package/src/core/selection/mapping.ts +20 -0
  17. package/src/core/state/editor-state.ts +2 -1
  18. package/src/index.ts +28 -0
  19. package/src/io/docx-session.ts +22 -3
  20. package/src/io/export/export-session.ts +11 -7
  21. package/src/io/export/ooxml-namespaces.ts +47 -0
  22. package/src/io/export/reattach-preserved-parts.ts +4 -16
  23. package/src/io/export/serialize-comments.ts +3 -131
  24. package/src/io/export/serialize-ffdata.ts +89 -0
  25. package/src/io/export/serialize-headers-footers.ts +5 -0
  26. package/src/io/export/serialize-main-document.ts +224 -34
  27. package/src/io/export/serialize-numbering.ts +22 -2
  28. package/src/io/export/serialize-revisions.ts +99 -0
  29. package/src/io/export/serialize-tables.ts +9 -0
  30. package/src/io/export/split-review-boundaries.ts +1 -0
  31. package/src/io/export/table-properties-xml.ts +14 -0
  32. package/src/io/load-scheduler.ts +70 -28
  33. package/src/io/normalize/normalize-text.ts +13 -0
  34. package/src/io/ooxml/_mini-xml.ts +198 -0
  35. package/src/io/ooxml/canonicalize-payload.ts +1 -4
  36. package/src/io/ooxml/chart/chart-style-table.ts +4 -3
  37. package/src/io/ooxml/chart/parse-chart-space.ts +2 -4
  38. package/src/io/ooxml/chart/parse-series.ts +2 -1
  39. package/src/io/ooxml/chart/resolve-color.ts +2 -2
  40. package/src/io/ooxml/chart/types.ts +6 -434
  41. package/src/io/ooxml/comment-presentation-payload.ts +6 -5
  42. package/src/io/ooxml/highlight-colors.ts +8 -5
  43. package/src/io/ooxml/parse-anchor.ts +68 -53
  44. package/src/io/ooxml/parse-comments.ts +14 -142
  45. package/src/io/ooxml/parse-complex-content.ts +3 -106
  46. package/src/io/ooxml/parse-drawing.ts +100 -195
  47. package/src/io/ooxml/parse-ffdata.ts +93 -0
  48. package/src/io/ooxml/parse-fields.ts +7 -146
  49. package/src/io/ooxml/parse-fill.ts +88 -8
  50. package/src/io/ooxml/parse-font-table.ts +5 -105
  51. package/src/io/ooxml/parse-footnotes.ts +28 -152
  52. package/src/io/ooxml/parse-headers-footers.ts +106 -212
  53. package/src/io/ooxml/parse-inline-media.ts +3 -200
  54. package/src/io/ooxml/parse-main-document.ts +180 -217
  55. package/src/io/ooxml/parse-numbering.ts +154 -335
  56. package/src/io/ooxml/parse-object.ts +147 -0
  57. package/src/io/ooxml/parse-ole-relationship.ts +82 -0
  58. package/src/io/ooxml/parse-paragraph-formatting.ts +7 -10
  59. package/src/io/ooxml/parse-picture-sdt.ts +85 -0
  60. package/src/io/ooxml/parse-picture.ts +72 -42
  61. package/src/io/ooxml/parse-revisions.ts +285 -51
  62. package/src/io/ooxml/parse-settings.ts +6 -99
  63. package/src/io/ooxml/parse-shapes.ts +25 -140
  64. package/src/io/ooxml/parse-styles.ts +3 -218
  65. package/src/io/ooxml/parse-tables.ts +76 -256
  66. package/src/io/ooxml/parse-theme.ts +1 -4
  67. package/src/io/ooxml/property-grab-bag.ts +5 -47
  68. package/src/io/ooxml/xml-element-serialize.ts +32 -0
  69. package/src/io/ooxml/xml-parser.ts +183 -0
  70. package/src/legal/bookmarks.ts +1 -1
  71. package/src/legal/cross-references.ts +1 -1
  72. package/src/legal/defined-terms.ts +1 -1
  73. package/src/legal/{_document-root.ts → document-root.ts} +8 -0
  74. package/src/legal/signature-blocks.ts +1 -1
  75. package/src/model/canonical-document.ts +159 -6
  76. package/src/model/chart-types.ts +439 -0
  77. package/src/model/snapshot.ts +3 -1
  78. package/src/review/store/comment-remapping.ts +24 -11
  79. package/src/review/store/revision-actions.ts +482 -2
  80. package/src/review/store/revision-store.ts +15 -0
  81. package/src/review/store/revision-types.ts +76 -0
  82. package/src/runtime/collab/remote-cursor-awareness.ts +24 -0
  83. package/src/runtime/collab/runtime-collab-sync.ts +33 -0
  84. package/src/runtime/diagnostics/build-diagnostic.ts +151 -0
  85. package/src/runtime/diagnostics/code-metadata-table.ts +221 -0
  86. package/src/runtime/document-runtime.ts +476 -34
  87. package/src/runtime/document-search.ts +115 -0
  88. package/src/runtime/edit-ops/index.ts +18 -2
  89. package/src/runtime/footnote-resolver.ts +130 -0
  90. package/src/runtime/layout/layout-engine-instance.ts +31 -4
  91. package/src/runtime/layout/layout-engine-version.ts +37 -1
  92. package/src/runtime/layout/page-graph.ts +14 -1
  93. package/src/runtime/layout/resolved-formatting-state.ts +21 -0
  94. package/src/runtime/numbering-prefix.ts +17 -0
  95. package/src/runtime/query-scopes.ts +5 -8
  96. package/src/runtime/resolved-numbering-geometry.ts +37 -6
  97. package/src/runtime/revision-runtime.ts +27 -1
  98. package/src/runtime/selection/post-edit-validator.ts +60 -6
  99. package/src/runtime/structure-ops/index.ts +20 -4
  100. package/src/runtime/surface-projection.ts +290 -21
  101. package/src/runtime/table-schema.ts +6 -0
  102. package/src/runtime/theme-color-resolver.ts +2 -2
  103. package/src/runtime/units.ts +9 -0
  104. package/src/runtime/workflow-rail-segments.ts +4 -0
  105. package/src/ui/WordReviewEditor.tsx +187 -43
  106. package/src/ui/editor-runtime-boundary.ts +10 -0
  107. package/src/ui/editor-shell-view.tsx +4 -1
  108. package/src/ui/headless/chrome-registry.ts +53 -0
  109. package/src/ui/headless/selection-tool-resolver.ts +11 -1
  110. package/src/ui-tailwind/chrome/chrome-preset-model.ts +13 -0
  111. package/src/ui-tailwind/chrome/tw-command-palette-mount.tsx +96 -0
  112. package/src/ui-tailwind/chrome/tw-context-menu.tsx +2 -1
  113. package/src/ui-tailwind/chrome/tw-image-context-toolbar.tsx +5 -4
  114. package/src/ui-tailwind/chrome/tw-mode-dock.tsx +6 -2
  115. package/src/ui-tailwind/chrome/use-container-breakpoint.ts +111 -0
  116. package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +0 -9
  117. package/src/ui-tailwind/chrome-overlay/tw-object-selection-overlay.tsx +1 -0
  118. package/src/ui-tailwind/chrome-overlay/tw-page-stack-overlay-layer.tsx +6 -7
  119. package/src/ui-tailwind/editor-surface/pm-schema.ts +87 -25
  120. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +9 -0
  121. package/src/ui-tailwind/editor-surface/shape-renderer.ts +76 -14
  122. package/src/ui-tailwind/editor-surface/tw-page-block-view.helpers.ts +18 -1
  123. package/src/ui-tailwind/editor-surface/tw-page-block-view.tsx +2 -0
  124. package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +18 -2
  125. package/src/ui-tailwind/index.ts +9 -0
  126. package/src/ui-tailwind/page-chrome-model.ts +77 -5
  127. package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +56 -1
  128. package/src/ui-tailwind/page-stack/tw-region-block-renderer.tsx +2 -0
  129. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +116 -113
  130. package/src/ui-tailwind/review/tw-review-rail-footer.tsx +2 -2
  131. package/src/ui-tailwind/theme/tokens.ts +14 -0
  132. package/src/ui-tailwind/toolbar/tw-shell-header.tsx +5 -0
  133. package/src/ui-tailwind/tw-review-workspace.tsx +29 -87
  134. package/src/validation/diagnostics.ts +1 -0
@@ -12,8 +12,12 @@ import type {
12
12
  TableRowNode,
13
13
  TextMark,
14
14
  } from "../../model/canonical-document.ts";
15
+ import type { LegacyFormFieldNode } from "../../model/canonical-document.ts";
15
16
  import { resolveHighlightColor } from "./highlight-colors.ts";
17
+ import { parseFFDataFromFldChar } from "./parse-ffdata.ts";
16
18
  import { classifyFieldInstruction } from "./parse-fields.ts";
19
+ import { parseXmlWithOffsets as parseXml } from "./xml-parser.ts";
20
+ import { localName, readStringAttr } from "./xml-attr-helpers.ts";
17
21
  import {
18
22
  readCellBorders,
19
23
  readCellCnfStyle,
@@ -47,6 +51,9 @@ import {
47
51
  } from "./parse-tables.ts";
48
52
  import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
49
53
 
54
+ const TAB_ALIGN_VOCAB = new Set<TabStop["align"]>(["left", "center", "right", "decimal", "num", "bar", "clear"]);
55
+ const TAB_LEADER_VOCAB = new Set<TabStop["leader"]>(["none", "dot", "hyphen", "underscore", "heavy", "middleDot"]);
56
+
50
57
  // ---- Public types ----
51
58
 
52
59
  export interface ParsedHeaderFooterReference {
@@ -91,7 +98,8 @@ let currentSourceXml = "";
91
98
  export function parseHeaderFooterReferences(
92
99
  documentXml: string,
93
100
  ): ParsedHeaderFooterReference[] {
94
- const root = parseXml(documentXml);
101
+ currentSourceXml = documentXml;
102
+ const root = parseXml(documentXml) as XmlElementNode;
95
103
  const documentElement = findChildElementOptional(root, "document");
96
104
  if (!documentElement) {
97
105
  return [];
@@ -169,7 +177,7 @@ function extractSectPrRefs(
169
177
  if (name === "headerReference" || name === "footerReference") {
170
178
  const kind: "header" | "footer" = name === "headerReference" ? "header" : "footer";
171
179
  const rawType =
172
- child.attributes["w:type"] ?? child.attributes.type ?? "default";
180
+ readStringAttr(child, "w:type") ?? "default";
173
181
  const variant = toHeaderFooterVariant(rawType);
174
182
  const relationshipId =
175
183
  child.attributes["r:id"] ??
@@ -206,7 +214,16 @@ function parseHdrFtrXml(
206
214
  xml: string,
207
215
  rootLocalName: "hdr" | "ftr",
208
216
  ): ParsedHeaderFooterDocument {
209
- const root = parseXml(xml);
217
+ currentSourceXml = xml;
218
+ let root: XmlElementNode;
219
+ try {
220
+ root = parseXml(xml) as XmlElementNode;
221
+ } catch {
222
+ // Tolerate malformed header/footer XML — treat as empty rather than
223
+ // throwing into the docx-session load path. Matches the pre-dedup
224
+ // behaviour of the former private parser.
225
+ return { blocks: [] };
226
+ }
210
227
  const hdrFtrElement = findChildElementOptional(root, rootLocalName);
211
228
  if (!hdrFtrElement) {
212
229
  return { blocks: [] };
@@ -222,12 +239,12 @@ function parseHdrFtrXml(
222
239
  const name = localName(child.name);
223
240
 
224
241
  if (name === "p") {
225
- blocks.push(parseParagraphElement(child));
242
+ blocks.push(parseParagraphElement(child, xml));
226
243
  } else if (name === "tbl") {
227
244
  // Simple tables (no revisions, fields, or nested tables) are promoted
228
245
  // to supported-roundtrip; structurally risky tables stay opaque.
229
246
  if (isSimpleSecondaryStoryTable(child)) {
230
- blocks.push(parseSimpleTableElement(child));
247
+ blocks.push(parseSimpleTableElement(child, xml));
231
248
  } else {
232
249
  blocks.push({
233
250
  type: "opaque_block",
@@ -250,18 +267,14 @@ function parseHdrFtrXml(
250
267
  return { blocks };
251
268
  }
252
269
 
253
- function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
270
+ function parseParagraphElement(pElement: XmlElementNode, sourceXml: string): ParagraphNode {
254
271
  let styleId: string | undefined;
255
272
  let alignment: ParagraphNode["alignment"];
256
273
  let spacing: ParagraphNode["spacing"];
257
274
  let indentation: ParagraphNode["indentation"];
258
275
  let tabStops: ParagraphNode["tabStops"];
259
276
  const children: InlineNode[] = [];
260
- let activeComplexField: {
261
- instruction: string;
262
- children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
263
- mode: "instruction" | "result";
264
- } | null = null;
277
+ let activeComplexField: ActiveComplexField | null = null;
265
278
 
266
279
  for (const child of pElement.children) {
267
280
  if (child.type !== "element") {
@@ -282,14 +295,21 @@ function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
282
295
  indentation = readParagraphIndentation(child);
283
296
  tabStops = readParagraphTabStops(child);
284
297
  } else if (name === "r") {
285
- activeComplexField = appendRunNodes(child, children, activeComplexField);
298
+ activeComplexField = appendRunNodes(child, children, activeComplexField, sourceXml);
286
299
  } else if (name === "hyperlink") {
287
300
  children.push(parseHyperlinkElement(child));
288
301
  } else if (name === "bookmarkStart" || name === "bookmarkEnd") {
289
302
  children.push(parseBookmarkElement(child));
290
303
  } else if (name === "fldSimple") {
291
304
  if (activeComplexField && activeComplexField.instruction.trim().length > 0) {
292
- children.push(createFieldNode(activeComplexField.instruction, "complex", activeComplexField.children));
305
+ children.push(
306
+ createFieldNode(
307
+ activeComplexField.instruction,
308
+ "complex",
309
+ activeComplexField.children,
310
+ activeComplexField.legacyFormField,
311
+ ),
312
+ );
293
313
  activeComplexField = null;
294
314
  }
295
315
  pushFieldNode(children, child, "simple");
@@ -297,7 +317,14 @@ function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
297
317
  }
298
318
 
299
319
  if (activeComplexField && activeComplexField.instruction.trim().length > 0) {
300
- children.push(createFieldNode(activeComplexField.instruction, "complex", activeComplexField.children));
320
+ children.push(
321
+ createFieldNode(
322
+ activeComplexField.instruction,
323
+ "complex",
324
+ activeComplexField.children,
325
+ activeComplexField.legacyFormField,
326
+ ),
327
+ );
301
328
  }
302
329
 
303
330
  return {
@@ -311,19 +338,19 @@ function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
311
338
  };
312
339
  }
313
340
 
314
- function appendRunNodes(
315
- rElement: XmlElementNode,
316
- nodes: InlineNode[],
317
- activeComplexField: {
318
- instruction: string;
319
- children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
320
- mode: "instruction" | "result";
321
- } | null,
322
- ): {
341
+ interface ActiveComplexField {
323
342
  instruction: string;
324
343
  children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
325
344
  mode: "instruction" | "result";
326
- } | null {
345
+ legacyFormField?: LegacyFormFieldNode;
346
+ }
347
+
348
+ function appendRunNodes(
349
+ rElement: XmlElementNode,
350
+ nodes: InlineNode[],
351
+ activeComplexField: ActiveComplexField | null,
352
+ sourceXml: string,
353
+ ): ActiveComplexField | null {
327
354
  const marks: TextMark[] = parseRunProperties(rElement);
328
355
 
329
356
  for (const child of rElement.children) {
@@ -333,14 +360,29 @@ function appendRunNodes(
333
360
 
334
361
  const name = localName(child.name);
335
362
  if (name === "fldChar") {
336
- const fldType = child.attributes["w:fldCharType"] ?? child.attributes.fldCharType;
363
+ const fldType = readStringAttr(child, "w:fldCharType");
337
364
  if (fldType === "begin") {
338
- activeComplexField = { instruction: "", children: [], mode: "instruction" };
365
+ // Phase T: parseFFDataFromFldChar populates legacyFormField for legacy
366
+ // form fields (FORMTEXT/FORMCHECKBOX/FORMDROPDOWN) in headers/footers.
367
+ const legacyFormField = parseFFDataFromFldChar(child, sourceXml);
368
+ activeComplexField = {
369
+ instruction: "",
370
+ children: [],
371
+ mode: "instruction",
372
+ ...(legacyFormField ? { legacyFormField } : {}),
373
+ };
339
374
  } else if (fldType === "separate" && activeComplexField) {
340
375
  activeComplexField.mode = "result";
341
376
  } else if (fldType === "end" && activeComplexField) {
342
377
  if (activeComplexField.instruction.trim().length > 0) {
343
- nodes.push(createFieldNode(activeComplexField.instruction, "complex", activeComplexField.children));
378
+ nodes.push(
379
+ createFieldNode(
380
+ activeComplexField.instruction,
381
+ "complex",
382
+ activeComplexField.children,
383
+ activeComplexField.legacyFormField,
384
+ ),
385
+ );
344
386
  }
345
387
  activeComplexField = null;
346
388
  }
@@ -404,7 +446,7 @@ function parseRunElement(rElement: XmlElementNode): InlineNode[] {
404
446
  nodes.push({ type: "tab" });
405
447
  } else if (name === "footnoteReference") {
406
448
  const noteId =
407
- child.attributes["w:id"] ?? child.attributes.id ?? "";
449
+ readStringAttr(child, "w:id") ?? "";
408
450
  if (noteId) {
409
451
  const ref: FootnoteRefNode = {
410
452
  type: "footnote_ref",
@@ -415,7 +457,7 @@ function parseRunElement(rElement: XmlElementNode): InlineNode[] {
415
457
  }
416
458
  } else if (name === "endnoteReference") {
417
459
  const noteId =
418
- child.attributes["w:id"] ?? child.attributes.id ?? "";
460
+ readStringAttr(child, "w:id") ?? "";
419
461
  if (noteId) {
420
462
  const ref: FootnoteRefNode = {
421
463
  type: "footnote_ref",
@@ -495,7 +537,7 @@ function parseRunChildNode(
495
537
  }
496
538
  if (name === "footnoteReference") {
497
539
  const noteId =
498
- child.attributes["w:id"] ?? child.attributes.id ?? "";
540
+ readStringAttr(child, "w:id") ?? "";
499
541
  if (noteId) {
500
542
  const ref: FootnoteRefNode = {
501
543
  type: "footnote_ref",
@@ -508,7 +550,7 @@ function parseRunChildNode(
508
550
  }
509
551
  if (name === "endnoteReference") {
510
552
  const noteId =
511
- child.attributes["w:id"] ?? child.attributes.id ?? "";
553
+ readStringAttr(child, "w:id") ?? "";
512
554
  if (noteId) {
513
555
  const ref: FootnoteRefNode = {
514
556
  type: "footnote_ref",
@@ -584,12 +626,12 @@ function parseHyperlinkElement(element: XmlElementNode): Extract<InlineNode, { t
584
626
  function parseBookmarkElement(
585
627
  element: XmlElementNode,
586
628
  ): Extract<InlineNode, { type: "bookmark_start" | "bookmark_end" }> {
587
- const bookmarkId = element.attributes["w:id"] ?? element.attributes.id ?? "0";
629
+ const bookmarkId = readStringAttr(element, "w:id") ?? "0";
588
630
  if (localName(element.name) === "bookmarkStart") {
589
631
  return {
590
632
  type: "bookmark_start",
591
633
  bookmarkId,
592
- name: element.attributes["w:name"] ?? element.attributes.name ?? "",
634
+ name: readStringAttr(element, "w:name") ?? "",
593
635
  };
594
636
  }
595
637
 
@@ -618,6 +660,7 @@ function createFieldNode(
618
660
  instruction: string,
619
661
  fieldType: "simple" | "complex",
620
662
  children: InlineNode[] = [],
663
+ legacyFormField?: LegacyFormFieldNode,
621
664
  ): Extract<InlineNode, { type: "field" }> {
622
665
  const classification = classifyFieldInstruction(instruction);
623
666
  return {
@@ -628,13 +671,13 @@ function createFieldNode(
628
671
  fieldFamily: classification.family,
629
672
  ...(classification.target ? { fieldTarget: classification.target } : {}),
630
673
  refreshStatus: classification.supported ? "stale" : "preserve-only",
674
+ ...(legacyFormField ? { legacyFormField } : {}),
631
675
  };
632
676
  }
633
677
 
634
678
  function readFieldInstruction(element: XmlElementNode): string | undefined {
635
679
  const instruction =
636
- element.attributes["w:instr"] ??
637
- element.attributes.instr ??
680
+ readStringAttr(element, "w:instr") ??
638
681
  extractTextContent(element);
639
682
  return instruction.trim().length > 0 ? instruction : undefined;
640
683
  }
@@ -653,7 +696,7 @@ function parseRunProperties(rElement: XmlElementNode): TextMark[] {
653
696
  }
654
697
 
655
698
  const name = localName(child.name);
656
- const val = child.attributes["w:val"] ?? child.attributes.val ?? "true";
699
+ const val = readStringAttr(child, "w:val") ?? "true";
657
700
 
658
701
  switch (name) {
659
702
  case "b":
@@ -693,7 +736,7 @@ function parseRunProperties(rElement: XmlElementNode): TextMark[] {
693
736
  break;
694
737
  }
695
738
  case "sz": {
696
- const szVal = child.attributes["w:val"] ?? child.attributes.val;
739
+ const szVal = readStringAttr(child, "w:val");
697
740
  if (szVal) {
698
741
  const size = Number.parseInt(szVal, 10);
699
742
  if (Number.isFinite(size) && size > 0) {
@@ -703,7 +746,7 @@ function parseRunProperties(rElement: XmlElementNode): TextMark[] {
703
746
  break;
704
747
  }
705
748
  case "color": {
706
- const colorVal = child.attributes["w:val"] ?? child.attributes.val;
749
+ const colorVal = readStringAttr(child, "w:val");
707
750
  // A.9: preserve "auto" verbatim for round-trip.
708
751
  if (colorVal) {
709
752
  marks.push({ type: "textColor", color: colorVal });
@@ -711,7 +754,7 @@ function parseRunProperties(rElement: XmlElementNode): TextMark[] {
711
754
  break;
712
755
  }
713
756
  case "shd": {
714
- const fill = child.attributes["w:fill"] ?? child.attributes.fill;
757
+ const fill = readStringAttr(child, "w:fill");
715
758
  if (fill && fill !== "auto") {
716
759
  marks.push({ type: "backgroundColor", color: fill });
717
760
  }
@@ -719,7 +762,7 @@ function parseRunProperties(rElement: XmlElementNode): TextMark[] {
719
762
  }
720
763
  case "highlight": {
721
764
  const resolvedHighlight = resolveHighlightColor(
722
- child.attributes["w:val"] ?? child.attributes.val,
765
+ readStringAttr(child, "w:val"),
723
766
  );
724
767
  if (resolvedHighlight) {
725
768
  marks.push({
@@ -750,13 +793,13 @@ function readParagraphSpacing(pPr: XmlElementNode): ParagraphSpacing | undefined
750
793
  const spacingNode = findChildElementOptional(pPr, "spacing");
751
794
  if (!spacingNode) return undefined;
752
795
  const result: ParagraphSpacing = {};
753
- const before = spacingNode.attributes["w:before"] ?? spacingNode.attributes.before;
796
+ const before = readStringAttr(spacingNode, "w:before");
754
797
  if (before) result.before = Number.parseInt(before, 10);
755
- const after = spacingNode.attributes["w:after"] ?? spacingNode.attributes.after;
798
+ const after = readStringAttr(spacingNode, "w:after");
756
799
  if (after) result.after = Number.parseInt(after, 10);
757
- const line = spacingNode.attributes["w:line"] ?? spacingNode.attributes.line;
800
+ const line = readStringAttr(spacingNode, "w:line");
758
801
  if (line) result.line = Number.parseInt(line, 10);
759
- const lineRule = spacingNode.attributes["w:lineRule"] ?? spacingNode.attributes.lineRule;
802
+ const lineRule = readStringAttr(spacingNode, "w:lineRule");
760
803
  if (lineRule === "auto" || lineRule === "exact" || lineRule === "atLeast") {
761
804
  result.lineRule = lineRule;
762
805
  }
@@ -767,13 +810,13 @@ function readParagraphIndentation(pPr: XmlElementNode): ParagraphIndentation | u
767
810
  const indNode = findChildElementOptional(pPr, "ind");
768
811
  if (!indNode) return undefined;
769
812
  const result: ParagraphIndentation = {};
770
- const left = indNode.attributes["w:left"] ?? indNode.attributes.left;
813
+ const left = readStringAttr(indNode, "w:left");
771
814
  if (left) result.left = Number.parseInt(left, 10);
772
- const right = indNode.attributes["w:right"] ?? indNode.attributes.right;
815
+ const right = readStringAttr(indNode, "w:right");
773
816
  if (right) result.right = Number.parseInt(right, 10);
774
- const firstLine = indNode.attributes["w:firstLine"] ?? indNode.attributes.firstLine;
817
+ const firstLine = readStringAttr(indNode, "w:firstLine");
775
818
  if (firstLine) result.firstLine = Number.parseInt(firstLine, 10);
776
- const hanging = indNode.attributes["w:hanging"] ?? indNode.attributes.hanging;
819
+ const hanging = readStringAttr(indNode, "w:hanging");
777
820
  if (hanging) result.hanging = Number.parseInt(hanging, 10);
778
821
  return Object.keys(result).length > 0 ? result : undefined;
779
822
  }
@@ -785,30 +828,18 @@ function readParagraphTabStops(pPr: XmlElementNode): TabStop[] | undefined {
785
828
  const tabStops: TabStop[] = [];
786
829
  for (const child of tabsNode.children) {
787
830
  if (child.type !== "element" || localName(child.name) !== "tab") continue;
788
- const pos = child.attributes["w:pos"] ?? child.attributes.pos;
789
- const val = (child.attributes["w:val"] ?? child.attributes.val ?? "left").toLowerCase();
790
- const leader = (child.attributes["w:leader"] ?? child.attributes.leader ?? "none").toLowerCase();
831
+ const pos = readStringAttr(child, "w:pos");
832
+ const val = (readStringAttr(child, "w:val") ?? "left").toLowerCase();
833
+ const leader = (readStringAttr(child, "w:leader") ?? "none").toLowerCase();
791
834
 
792
835
  if (pos === undefined) continue;
793
836
  const position = Number.parseInt(pos, 10);
794
837
  if (!Number.isFinite(position)) continue;
795
838
 
796
- const align = (["left", "center", "right", "decimal", "bar", "clear"] as const).includes(
797
- val as "left" | "center" | "right" | "decimal" | "bar" | "clear",
798
- )
799
- ? (val as TabStop["align"])
800
- : "left";
801
-
802
- const leaderValue =
803
- leader === "none" ||
804
- leader === "dot" ||
805
- leader === "hyphen" ||
806
- leader === "underscore" ||
807
- leader === "heavy"
808
- ? (leader as Exclude<TabStop["leader"], "middleDot">)
809
- : leader === "middledot"
810
- ? "middleDot"
811
- : undefined;
839
+ const alignCandidate = val as TabStop["align"];
840
+ const align: TabStop["align"] = TAB_ALIGN_VOCAB.has(alignCandidate) ? alignCandidate : "left";
841
+ const leaderNorm = leader === "middledot" ? "middleDot" : (leader as TabStop["leader"]);
842
+ const leaderValue: TabStop["leader"] | undefined = TAB_LEADER_VOCAB.has(leaderNorm) ? leaderNorm : undefined;
812
843
 
813
844
  tabStops.push({
814
845
  position,
@@ -857,10 +888,6 @@ function findFirstDescendant(
857
888
  return undefined;
858
889
  }
859
890
 
860
- function localName(name: string): string {
861
- const separatorIndex = name.indexOf(":");
862
- return separatorIndex >= 0 ? name.slice(separatorIndex + 1) : name;
863
- }
864
891
 
865
892
  // ---- Simple secondary-story table support ----
866
893
 
@@ -895,8 +922,7 @@ function containsRiskyElement(element: XmlElementNode): boolean {
895
922
  const name = localName(child.name);
896
923
  if (name === "fldSimple" || name === "instrText") {
897
924
  const instruction =
898
- child.attributes["w:instr"] ??
899
- child.attributes.instr ??
925
+ readStringAttr(child, "w:instr") ??
900
926
  extractTextContent(child);
901
927
  const classification = classifyFieldInstruction(instruction);
902
928
  if (!isSafeSecondaryStoryFieldFamily(classification.family)) {
@@ -932,7 +958,7 @@ function isSafeSecondaryStoryFieldFamily(family: string): boolean {
932
958
  );
933
959
  }
934
960
 
935
- function parseSimpleTableElement(tblElement: XmlElementNode): TableNode {
961
+ function parseSimpleTableElement(tblElement: XmlElementNode, sourceXml: string): TableNode {
936
962
  let gridColumns: number[] = [];
937
963
  const rows: TableRowNode[] = [];
938
964
  let propertiesXml: string | undefined;
@@ -972,7 +998,7 @@ function parseSimpleTableElement(tblElement: XmlElementNode): TableNode {
972
998
  } else if (name === "tblGrid") {
973
999
  gridColumns = readGridColumns(child);
974
1000
  } else if (name === "tr") {
975
- rows.push(parseSimpleTableRow(child));
1001
+ rows.push(parseSimpleTableRow(child, sourceXml));
976
1002
  }
977
1003
  }
978
1004
 
@@ -1001,7 +1027,7 @@ function readGridColumns(tblGrid: XmlElementNode): number[] {
1001
1027
  return readSharedGridColumns(tblGrid);
1002
1028
  }
1003
1029
 
1004
- function parseSimpleTableRow(trElement: XmlElementNode): TableRowNode {
1030
+ function parseSimpleTableRow(trElement: XmlElementNode, sourceXml: string): TableRowNode {
1005
1031
  const cells: TableCellNode[] = [];
1006
1032
  let propertiesXml: string | undefined;
1007
1033
  let height: TableRowNode["height"];
@@ -1024,7 +1050,7 @@ function parseSimpleTableRow(trElement: XmlElementNode): TableRowNode {
1024
1050
  horizontalAlignment = readRowHorizontalAlignment(child);
1025
1051
  cnfStyle = readRowCnfStyle(child);
1026
1052
  } else if (name === "tc") {
1027
- cells.push(parseSimpleTableCell(child));
1053
+ cells.push(parseSimpleTableCell(child, sourceXml));
1028
1054
  }
1029
1055
  }
1030
1056
 
@@ -1041,7 +1067,7 @@ function parseSimpleTableRow(trElement: XmlElementNode): TableRowNode {
1041
1067
  };
1042
1068
  }
1043
1069
 
1044
- function parseSimpleTableCell(tcElement: XmlElementNode): TableCellNode {
1070
+ function parseSimpleTableCell(tcElement: XmlElementNode, sourceXml: string): TableCellNode {
1045
1071
  const children: BlockNode[] = [];
1046
1072
  let propertiesXml: string | undefined;
1047
1073
  let gridSpan: number | undefined;
@@ -1068,7 +1094,7 @@ function parseSimpleTableCell(tcElement: XmlElementNode): TableCellNode {
1068
1094
 
1069
1095
  const vmEl = findChildElementOptional(child, "vMerge");
1070
1096
  if (vmEl) {
1071
- const vmVal = vmEl.attributes["w:val"] ?? vmEl.attributes.val ?? "continue";
1097
+ const vmVal = readStringAttr(vmEl, "w:val") ?? "continue";
1072
1098
  verticalMerge = vmVal === "restart" ? "restart" : "continue";
1073
1099
  }
1074
1100
  width = readCellWidth(child);
@@ -1081,7 +1107,7 @@ function parseSimpleTableCell(tcElement: XmlElementNode): TableCellNode {
1081
1107
  margins = readCellMargins(child);
1082
1108
  cnfStyle = readCellCnfStyle(child);
1083
1109
  } else if (name === "p") {
1084
- children.push(parseParagraphElement(child));
1110
+ children.push(parseParagraphElement(child, sourceXml));
1085
1111
  }
1086
1112
  }
1087
1113
 
@@ -1138,135 +1164,3 @@ function escapeXmlText(text: string): string {
1138
1164
  .replace(/</g, "&lt;")
1139
1165
  .replace(/>/g, "&gt;");
1140
1166
  }
1141
-
1142
- // ---- Minimal XML parser (same pattern as parse-numbering.ts) ----
1143
-
1144
- function parseXml(xml: string): XmlElementNode {
1145
- currentSourceXml = xml;
1146
- const root: XmlElementNode = {
1147
- type: "element",
1148
- name: "__root__",
1149
- attributes: {},
1150
- children: [],
1151
- start: 0,
1152
- end: xml.length,
1153
- };
1154
- const stack: XmlElementNode[] = [root];
1155
- let cursor = 0;
1156
-
1157
- while (cursor < xml.length) {
1158
- if (xml.startsWith("<!--", cursor)) {
1159
- const end = xml.indexOf("-->", cursor);
1160
- cursor = end >= 0 ? end + 3 : xml.length;
1161
- continue;
1162
- }
1163
-
1164
- if (xml.startsWith("<?", cursor)) {
1165
- const end = xml.indexOf("?>", cursor);
1166
- cursor = end >= 0 ? end + 2 : xml.length;
1167
- continue;
1168
- }
1169
-
1170
- if (xml.startsWith("<![CDATA[", cursor)) {
1171
- const end = xml.indexOf("]]>", cursor);
1172
- const textEnd = end >= 0 ? end : xml.length;
1173
- stack[stack.length - 1]?.children.push({
1174
- type: "text",
1175
- text: xml.slice(cursor + 9, textEnd),
1176
- start: cursor,
1177
- end: end >= 0 ? end + 3 : xml.length,
1178
- });
1179
- cursor = end >= 0 ? end + 3 : xml.length;
1180
- continue;
1181
- }
1182
-
1183
- if (xml[cursor] !== "<") {
1184
- const nextTag = xml.indexOf("<", cursor);
1185
- const end = nextTag >= 0 ? nextTag : xml.length;
1186
- const text = decodeXmlEntities(xml.slice(cursor, end));
1187
- if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
1188
- stack[stack.length - 1]?.children.push({ type: "text", text, start: cursor, end });
1189
- }
1190
- cursor = end;
1191
- continue;
1192
- }
1193
-
1194
- // Closing tag
1195
- if (xml[cursor + 1] === "/") {
1196
- const end = xml.indexOf(">", cursor);
1197
- if (end < 0) {
1198
- break;
1199
- }
1200
- const current = stack.pop();
1201
- if (current) {
1202
- current.end = end + 1;
1203
- }
1204
- cursor = end + 1;
1205
- continue;
1206
- }
1207
-
1208
- // Open or self-closing tag
1209
- const tagEnd = xml.indexOf(">", cursor);
1210
- if (tagEnd < 0) {
1211
- break;
1212
- }
1213
-
1214
- const tagContent = xml.slice(cursor + 1, tagEnd);
1215
- const selfClosing = tagContent.endsWith("/");
1216
- const normalized = selfClosing ? tagContent.slice(0, -1).trimEnd() : tagContent;
1217
-
1218
- const spaceIndex = normalized.search(/\s/);
1219
- const tagName =
1220
- spaceIndex >= 0 ? normalized.slice(0, spaceIndex) : normalized;
1221
- const attrString =
1222
- spaceIndex >= 0 ? normalized.slice(spaceIndex + 1) : "";
1223
- const attributes = parseAttributes(attrString);
1224
-
1225
- const element: XmlElementNode = {
1226
- type: "element",
1227
- name: tagName,
1228
- attributes,
1229
- children: [],
1230
- start: cursor,
1231
- end: tagEnd + 1,
1232
- };
1233
-
1234
- stack[stack.length - 1]?.children.push(element);
1235
-
1236
- if (!selfClosing) {
1237
- stack.push(element);
1238
- }
1239
-
1240
- cursor = tagEnd + 1;
1241
- }
1242
-
1243
- return root;
1244
- }
1245
-
1246
- function parseAttributes(attrString: string): Record<string, string> {
1247
- const attrs: Record<string, string> = {};
1248
- const pattern = /([A-Za-z_:][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
1249
-
1250
- for (const match of attrString.matchAll(pattern)) {
1251
- const name = match[1];
1252
- const value = match[3] ?? match[4] ?? "";
1253
- if (name) {
1254
- attrs[name] = decodeXmlEntities(value);
1255
- }
1256
- }
1257
-
1258
- return attrs;
1259
- }
1260
-
1261
- function decodeXmlEntities(text: string): string {
1262
- return text
1263
- .replace(/&amp;/g, "&")
1264
- .replace(/&lt;/g, "<")
1265
- .replace(/&gt;/g, ">")
1266
- .replace(/&quot;/g, '"')
1267
- .replace(/&apos;/g, "'")
1268
- .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10)))
1269
- .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) =>
1270
- String.fromCodePoint(Number.parseInt(hex, 16)),
1271
- );
1272
- }