@beyondwork/docx-react-component 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +8 -2
  2. package/package.json +24 -34
  3. package/src/api/README.md +5 -1
  4. package/src/api/public-types.ts +710 -4
  5. package/src/api/session-state.ts +60 -0
  6. package/src/core/commands/formatting-commands.ts +2 -1
  7. package/src/core/commands/image-commands.ts +147 -0
  8. package/src/core/commands/index.ts +19 -3
  9. package/src/core/commands/list-commands.ts +231 -36
  10. package/src/core/commands/paragraph-layout-commands.ts +339 -0
  11. package/src/core/commands/section-layout-commands.ts +680 -0
  12. package/src/core/commands/style-commands.ts +262 -0
  13. package/src/core/search/search-text.ts +357 -0
  14. package/src/core/selection/mapping.ts +41 -0
  15. package/src/core/state/editor-state.ts +4 -1
  16. package/src/index.ts +51 -0
  17. package/src/io/docx-session.ts +623 -56
  18. package/src/io/export/serialize-comments.ts +104 -34
  19. package/src/io/export/serialize-footnotes.ts +198 -1
  20. package/src/io/export/serialize-headers-footers.ts +203 -10
  21. package/src/io/export/serialize-main-document.ts +285 -8
  22. package/src/io/export/serialize-numbering.ts +28 -7
  23. package/src/io/export/split-review-boundaries.ts +181 -19
  24. package/src/io/normalize/normalize-text.ts +144 -32
  25. package/src/io/ooxml/highlight-colors.ts +39 -0
  26. package/src/io/ooxml/numbering-sentinels.ts +44 -0
  27. package/src/io/ooxml/parse-comments.ts +85 -19
  28. package/src/io/ooxml/parse-fields.ts +396 -0
  29. package/src/io/ooxml/parse-footnotes.ts +452 -22
  30. package/src/io/ooxml/parse-headers-footers.ts +657 -29
  31. package/src/io/ooxml/parse-inline-media.ts +30 -0
  32. package/src/io/ooxml/parse-main-document.ts +807 -20
  33. package/src/io/ooxml/parse-numbering.ts +7 -0
  34. package/src/io/ooxml/parse-revisions.ts +317 -38
  35. package/src/io/ooxml/parse-settings.ts +184 -0
  36. package/src/io/ooxml/parse-shapes.ts +25 -0
  37. package/src/io/ooxml/parse-styles.ts +463 -0
  38. package/src/io/ooxml/parse-theme.ts +32 -0
  39. package/src/legal/bookmarks.ts +44 -0
  40. package/src/legal/cross-references.ts +59 -1
  41. package/src/model/canonical-document.ts +250 -4
  42. package/src/model/cds-1.0.0.ts +13 -0
  43. package/src/model/snapshot.ts +87 -2
  44. package/src/review/store/revision-store.ts +6 -0
  45. package/src/review/store/revision-types.ts +1 -0
  46. package/src/runtime/document-layout.ts +332 -0
  47. package/src/runtime/document-navigation.ts +603 -0
  48. package/src/runtime/document-runtime.ts +1754 -78
  49. package/src/runtime/document-search.ts +145 -0
  50. package/src/runtime/numbering-prefix.ts +47 -26
  51. package/src/runtime/page-layout-estimation.ts +212 -0
  52. package/src/runtime/read-only-diagnostics-runtime.ts +9 -0
  53. package/src/runtime/session-capabilities.ts +35 -3
  54. package/src/runtime/story-context.ts +164 -0
  55. package/src/runtime/story-targeting.ts +162 -0
  56. package/src/runtime/surface-projection.ts +324 -36
  57. package/src/runtime/table-schema.ts +89 -7
  58. package/src/runtime/view-state.ts +477 -0
  59. package/src/runtime/workflow-markup.ts +349 -0
  60. package/src/ui/WordReviewEditor.tsx +2469 -1344
  61. package/src/ui/browser-export.ts +52 -0
  62. package/src/ui/editor-command-bag.ts +120 -0
  63. package/src/ui/editor-runtime-boundary.ts +1422 -0
  64. package/src/ui/editor-shell-view.tsx +134 -0
  65. package/src/ui/editor-surface-controller.tsx +51 -0
  66. package/src/ui/headless/preserve-editor-selection.ts +5 -0
  67. package/src/ui/headless/revision-decoration-model.ts +4 -4
  68. package/src/ui/headless/selection-helpers.ts +20 -0
  69. package/src/ui/headless/selection-toolbar-model.ts +22 -0
  70. package/src/ui/headless/use-editor-keyboard.ts +6 -1
  71. package/src/ui/runtime-snapshot-selectors.ts +197 -0
  72. package/src/ui-tailwind/chrome/tw-alert-banner.tsx +18 -2
  73. package/src/ui-tailwind/chrome/tw-image-context-toolbar.tsx +129 -0
  74. package/src/ui-tailwind/chrome/tw-layout-panel.tsx +114 -0
  75. package/src/ui-tailwind/chrome/tw-object-context-toolbar.tsx +34 -0
  76. package/src/ui-tailwind/chrome/tw-page-ruler.tsx +386 -0
  77. package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +150 -14
  78. package/src/ui-tailwind/chrome/tw-table-context-toolbar.tsx +128 -0
  79. package/src/ui-tailwind/editor-surface/perf-probe.ts +179 -0
  80. package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +46 -7
  81. package/src/ui-tailwind/editor-surface/pm-contextual-ui.ts +31 -0
  82. package/src/ui-tailwind/editor-surface/pm-decorations.ts +35 -0
  83. package/src/ui-tailwind/editor-surface/pm-position-map.ts +3 -3
  84. package/src/ui-tailwind/editor-surface/pm-schema.ts +186 -13
  85. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +191 -68
  86. package/src/ui-tailwind/editor-surface/search-plugin.ts +19 -68
  87. package/src/ui-tailwind/editor-surface/surface-build-keys.ts +51 -0
  88. package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +11 -0
  89. package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +7 -1
  90. package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +528 -85
  91. package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +0 -1
  92. package/src/ui-tailwind/index.ts +2 -1
  93. package/src/ui-tailwind/page-chrome-model.ts +27 -0
  94. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +277 -147
  95. package/src/ui-tailwind/review/tw-health-panel.tsx +31 -2
  96. package/src/ui-tailwind/review/tw-review-rail.tsx +8 -8
  97. package/src/ui-tailwind/review/tw-revision-sidebar.tsx +15 -15
  98. package/src/ui-tailwind/theme/editor-theme.css +127 -0
  99. package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +4 -0
  100. package/src/ui-tailwind/toolbar/tw-toolbar.tsx +829 -12
  101. package/src/ui-tailwind/tw-review-workspace.tsx +1238 -42
  102. package/src/validation/compatibility-engine.ts +119 -24
  103. package/src/validation/compatibility-report.ts +1 -0
  104. package/src/validation/diagnostics.ts +1 -0
  105. package/src/validation/docx-comment-proof.ts +707 -0
@@ -3,9 +3,18 @@ import type {
3
3
  FootnoteRefNode,
4
4
  HeaderFooterVariant,
5
5
  InlineNode,
6
+ ParagraphIndentation,
6
7
  ParagraphNode,
8
+ ParagraphSpacing,
9
+ TabStop,
10
+ TableCellNode,
11
+ TableNode,
12
+ TableRowNode,
7
13
  TextMark,
8
14
  } from "../../model/canonical-document.ts";
15
+ import { resolveHighlightColor } from "./highlight-colors.ts";
16
+ import { classifyFieldInstruction } from "./parse-fields.ts";
17
+ import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
9
18
 
10
19
  // ---- Public types ----
11
20
 
@@ -13,6 +22,7 @@ export interface ParsedHeaderFooterReference {
13
22
  variant: HeaderFooterVariant;
14
23
  relationshipId: string;
15
24
  kind: "header" | "footer";
25
+ sectionIndex?: number;
16
26
  }
17
27
 
18
28
  export interface ParsedHeaderFooterDocument {
@@ -26,15 +36,21 @@ interface XmlElementNode {
26
36
  name: string;
27
37
  attributes: Record<string, string>;
28
38
  children: XmlNode[];
39
+ start: number;
40
+ end: number;
29
41
  }
30
42
 
31
43
  interface XmlTextNode {
32
44
  type: "text";
33
45
  text: string;
46
+ start: number;
47
+ end: number;
34
48
  }
35
49
 
36
50
  type XmlNode = XmlElementNode | XmlTextNode;
37
51
 
52
+ let currentSourceXml = "";
53
+
38
54
  // ---- Public API ----
39
55
 
40
56
  /**
@@ -83,6 +99,7 @@ function collectSectPrReferences(
83
99
  element: XmlElementNode,
84
100
  refs: ParsedHeaderFooterReference[],
85
101
  ): void {
102
+ let sectionIndex = 0;
86
103
  for (const child of element.children) {
87
104
  if (child.type !== "element") {
88
105
  continue;
@@ -91,14 +108,16 @@ function collectSectPrReferences(
91
108
  const name = localName(child.name);
92
109
 
93
110
  if (name === "sectPr") {
94
- extractSectPrRefs(child, refs);
111
+ // Body-level sectPr is the final section
112
+ extractSectPrRefs(child, refs, sectionIndex);
95
113
  } else if (name === "p") {
96
- // Check paragraph properties for sectPr
114
+ // Check paragraph properties for sectPr (non-final section break)
97
115
  const pPr = findChildElementOptional(child, "pPr");
98
116
  if (pPr) {
99
117
  const sectPr = findChildElementOptional(pPr, "sectPr");
100
118
  if (sectPr) {
101
- extractSectPrRefs(sectPr, refs);
119
+ extractSectPrRefs(sectPr, refs, sectionIndex);
120
+ sectionIndex++;
102
121
  }
103
122
  }
104
123
  }
@@ -108,6 +127,7 @@ function collectSectPrReferences(
108
127
  function extractSectPrRefs(
109
128
  sectPr: XmlElementNode,
110
129
  refs: ParsedHeaderFooterReference[],
130
+ sectionIndex: number,
111
131
  ): void {
112
132
  for (const child of sectPr.children) {
113
133
  if (child.type !== "element") {
@@ -129,11 +149,12 @@ function extractSectPrRefs(
129
149
 
130
150
  if (relationshipId) {
131
151
  // Avoid duplicates (multiple sectPr may reference same header)
152
+ const dedupeKey = `${kind}:${variant}:${relationshipId}`;
132
153
  const alreadyAdded = refs.some(
133
- (ref) => ref.relationshipId === relationshipId && ref.kind === kind,
154
+ (ref) => `${ref.kind}:${ref.variant}:${ref.relationshipId}` === dedupeKey,
134
155
  );
135
156
  if (!alreadyAdded) {
136
- refs.push({ variant, relationshipId, kind });
157
+ refs.push({ variant, relationshipId, kind, sectionIndex });
137
158
  }
138
159
  }
139
160
  }
@@ -172,18 +193,25 @@ function parseHdrFtrXml(
172
193
  if (name === "p") {
173
194
  blocks.push(parseParagraphElement(child));
174
195
  } else if (name === "tbl") {
175
- // Table in header/footer: store as opaque to preserve fidelity
176
- blocks.push({
177
- type: "opaque_block",
178
- fragmentId: "fragment:hdrftr-tbl",
179
- warningId: "warning:hdrftr-opaque-table",
180
- });
196
+ // Simple tables (no revisions, fields, or nested tables) are promoted
197
+ // to supported-roundtrip; structurally risky tables stay opaque.
198
+ if (isSimpleSecondaryStoryTable(child)) {
199
+ blocks.push(parseSimpleTableElement(child));
200
+ } else {
201
+ blocks.push({
202
+ type: "opaque_block",
203
+ fragmentId: "fragment:hdrftr-tbl",
204
+ warningId: "warning:hdrftr-opaque-table",
205
+ rawXml: serializeElementToXml(child),
206
+ });
207
+ }
181
208
  } else {
182
209
  // Other block-level elements: treat as opaque
183
210
  blocks.push({
184
211
  type: "opaque_block",
185
212
  fragmentId: "fragment:hdrftr-opaque",
186
213
  warningId: "warning:hdrftr-opaque-block",
214
+ rawXml: serializeElementToXml(child),
187
215
  });
188
216
  }
189
217
  }
@@ -194,7 +222,15 @@ function parseHdrFtrXml(
194
222
  function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
195
223
  let styleId: string | undefined;
196
224
  let alignment: ParagraphNode["alignment"];
225
+ let spacing: ParagraphNode["spacing"];
226
+ let indentation: ParagraphNode["indentation"];
227
+ let tabStops: ParagraphNode["tabStops"];
197
228
  const children: InlineNode[] = [];
229
+ let activeComplexField: {
230
+ instruction: string;
231
+ children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
232
+ mode: "instruction" | "result";
233
+ } | null = null;
198
234
 
199
235
  for (const child of pElement.children) {
200
236
  if (child.type !== "element") {
@@ -211,25 +247,121 @@ function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
211
247
  if (jcVal === "left" || jcVal === "center" || jcVal === "right" || jcVal === "both" || jcVal === "distribute") {
212
248
  alignment = jcVal;
213
249
  }
250
+ spacing = readParagraphSpacing(child);
251
+ indentation = readParagraphIndentation(child);
252
+ tabStops = readParagraphTabStops(child);
214
253
  } else if (name === "r") {
215
- children.push(...parseRunElement(child));
254
+ activeComplexField = appendRunNodes(child, children, activeComplexField);
216
255
  } else if (name === "hyperlink") {
217
256
  children.push(parseHyperlinkElement(child));
218
257
  } else if (name === "bookmarkStart" || name === "bookmarkEnd") {
219
258
  children.push(parseBookmarkElement(child));
220
259
  } else if (name === "fldSimple") {
221
- children.push(parseFieldElement(child));
260
+ if (activeComplexField && activeComplexField.instruction.trim().length > 0) {
261
+ children.push({
262
+ type: "field",
263
+ fieldType: "complex",
264
+ instruction: activeComplexField.instruction,
265
+ children: activeComplexField.children,
266
+ });
267
+ activeComplexField = null;
268
+ }
269
+ pushFieldNode(children, child, "simple");
222
270
  }
223
271
  }
224
272
 
273
+ if (activeComplexField && activeComplexField.instruction.trim().length > 0) {
274
+ children.push({
275
+ type: "field",
276
+ fieldType: "complex",
277
+ instruction: activeComplexField.instruction,
278
+ children: activeComplexField.children,
279
+ });
280
+ }
281
+
225
282
  return {
226
283
  type: "paragraph",
227
284
  ...(styleId ? { styleId } : {}),
228
285
  ...(alignment ? { alignment } : {}),
286
+ ...(spacing ? { spacing } : {}),
287
+ ...(indentation ? { indentation } : {}),
288
+ ...(tabStops && tabStops.length > 0 ? { tabStops } : {}),
229
289
  children,
230
290
  };
231
291
  }
232
292
 
293
+ function appendRunNodes(
294
+ rElement: XmlElementNode,
295
+ nodes: InlineNode[],
296
+ activeComplexField: {
297
+ instruction: string;
298
+ children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
299
+ mode: "instruction" | "result";
300
+ } | null,
301
+ ): {
302
+ instruction: string;
303
+ children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
304
+ mode: "instruction" | "result";
305
+ } | null {
306
+ const marks: TextMark[] = parseRunProperties(rElement);
307
+
308
+ for (const child of rElement.children) {
309
+ if (child.type !== "element") {
310
+ continue;
311
+ }
312
+
313
+ const name = localName(child.name);
314
+ if (name === "fldChar") {
315
+ const fldType = child.attributes["w:fldCharType"] ?? child.attributes.fldCharType;
316
+ if (fldType === "begin") {
317
+ activeComplexField = { instruction: "", children: [], mode: "instruction" };
318
+ } else if (fldType === "separate" && activeComplexField) {
319
+ activeComplexField.mode = "result";
320
+ } else if (fldType === "end" && activeComplexField) {
321
+ if (activeComplexField.instruction.trim().length > 0) {
322
+ nodes.push({
323
+ type: "field",
324
+ fieldType: "complex",
325
+ instruction: activeComplexField.instruction,
326
+ children: activeComplexField.children,
327
+ });
328
+ }
329
+ activeComplexField = null;
330
+ }
331
+ continue;
332
+ }
333
+
334
+ if (name === "instrText") {
335
+ if (activeComplexField) {
336
+ activeComplexField.instruction += extractTextContent(child);
337
+ } else {
338
+ pushFieldNode(nodes, child, "complex");
339
+ }
340
+ continue;
341
+ }
342
+
343
+ const inlineNode = parseRunChildNode(child, marks);
344
+ if (!inlineNode) {
345
+ continue;
346
+ }
347
+
348
+ if (activeComplexField?.mode === "result") {
349
+ if (
350
+ inlineNode.type === "text" ||
351
+ inlineNode.type === "hard_break" ||
352
+ inlineNode.type === "tab"
353
+ ) {
354
+ activeComplexField.children.push(inlineNode);
355
+ }
356
+ continue;
357
+ }
358
+
359
+ nodes.push(inlineNode);
360
+ }
361
+
362
+ return activeComplexField;
363
+ }
364
+
233
365
  function parseRunElement(rElement: XmlElementNode): InlineNode[] {
234
366
  const nodes: InlineNode[] = [];
235
367
  const marks: TextMark[] = parseRunProperties(rElement);
@@ -278,14 +410,138 @@ function parseRunElement(rElement: XmlElementNode): InlineNode[] {
278
410
  }
279
411
  } else if (name === "bookmarkStart" || name === "bookmarkEnd") {
280
412
  nodes.push(parseBookmarkElement(child));
281
- } else if (name === "fldChar" || name === "instrText") {
282
- nodes.push(parseFieldElement(child));
413
+ } else if (name === "instrText") {
414
+ pushFieldNode(nodes, child, "complex");
415
+ } else if (name === "drawing") {
416
+ const drawingXml = currentSourceXml.slice(child.start, child.end);
417
+ const shapeResult = parseShapeXml(drawingXml);
418
+ if (shapeResult) {
419
+ nodes.push(shapeResult);
420
+ }
421
+ } else if (name === "pict") {
422
+ const pictXml = currentSourceXml.slice(child.start, child.end);
423
+ const vmlResult = parseVmlXml(pictXml);
424
+ if (vmlResult) {
425
+ nodes.push(vmlResult);
426
+ }
427
+ } else if (name === "AlternateContent") {
428
+ const drawingNode = findFirstDescendant(child, "drawing");
429
+ if (drawingNode) {
430
+ const drawingXml = currentSourceXml.slice(drawingNode.start, drawingNode.end);
431
+ const shapeResult = parseShapeXml(drawingXml);
432
+ if (shapeResult) {
433
+ nodes.push({
434
+ ...shapeResult,
435
+ rawXml: currentSourceXml.slice(child.start, child.end),
436
+ });
437
+ continue;
438
+ }
439
+ }
440
+ const pictNode = findFirstDescendant(child, "pict");
441
+ if (pictNode) {
442
+ const pictXml = currentSourceXml.slice(pictNode.start, pictNode.end);
443
+ const vmlResult = parseVmlXml(pictXml);
444
+ if (vmlResult) {
445
+ nodes.push({
446
+ ...vmlResult,
447
+ rawXml: currentSourceXml.slice(child.start, child.end),
448
+ });
449
+ }
450
+ }
283
451
  }
284
452
  }
285
453
 
286
454
  return nodes;
287
455
  }
288
456
 
457
+ function parseRunChildNode(
458
+ child: XmlElementNode,
459
+ marks: TextMark[],
460
+ ): InlineNode | null {
461
+ const name = localName(child.name);
462
+
463
+ if (name === "t") {
464
+ const text = extractTextContent(child);
465
+ if (text.length > 0) {
466
+ return {
467
+ type: "text",
468
+ text,
469
+ ...(marks.length > 0 ? { marks } : {}),
470
+ };
471
+ }
472
+ return null;
473
+ }
474
+ if (name === "br") {
475
+ return { type: "hard_break" };
476
+ }
477
+ if (name === "tab") {
478
+ return { type: "tab" };
479
+ }
480
+ if (name === "footnoteReference") {
481
+ const noteId =
482
+ child.attributes["w:id"] ?? child.attributes.id ?? "";
483
+ if (noteId) {
484
+ const ref: FootnoteRefNode = {
485
+ type: "footnote_ref",
486
+ noteId,
487
+ noteKind: "footnote",
488
+ };
489
+ return ref;
490
+ }
491
+ return null;
492
+ }
493
+ if (name === "endnoteReference") {
494
+ const noteId =
495
+ child.attributes["w:id"] ?? child.attributes.id ?? "";
496
+ if (noteId) {
497
+ const ref: FootnoteRefNode = {
498
+ type: "footnote_ref",
499
+ noteId,
500
+ noteKind: "endnote",
501
+ };
502
+ return ref;
503
+ }
504
+ return null;
505
+ }
506
+ if (name === "bookmarkStart" || name === "bookmarkEnd") {
507
+ return parseBookmarkElement(child);
508
+ }
509
+ if (name === "drawing") {
510
+ const drawingXml = currentSourceXml.slice(child.start, child.end);
511
+ return parseShapeXml(drawingXml);
512
+ }
513
+ if (name === "pict") {
514
+ const pictXml = currentSourceXml.slice(child.start, child.end);
515
+ return parseVmlXml(pictXml);
516
+ }
517
+ if (name === "AlternateContent") {
518
+ const drawingNode = findFirstDescendant(child, "drawing");
519
+ if (drawingNode) {
520
+ const drawingXml = currentSourceXml.slice(drawingNode.start, drawingNode.end);
521
+ const shapeResult = parseShapeXml(drawingXml);
522
+ if (shapeResult) {
523
+ return {
524
+ ...shapeResult,
525
+ rawXml: currentSourceXml.slice(child.start, child.end),
526
+ };
527
+ }
528
+ }
529
+ const pictNode = findFirstDescendant(child, "pict");
530
+ if (pictNode) {
531
+ const pictXml = currentSourceXml.slice(pictNode.start, pictNode.end);
532
+ const vmlResult = parseVmlXml(pictXml);
533
+ if (vmlResult) {
534
+ return {
535
+ ...vmlResult,
536
+ rawXml: currentSourceXml.slice(child.start, child.end),
537
+ };
538
+ }
539
+ }
540
+ }
541
+
542
+ return null;
543
+ }
544
+
289
545
  function parseHyperlinkElement(element: XmlElementNode): Extract<InlineNode, { type: "hyperlink" }> {
290
546
  const href = element.attributes["w:anchor"]
291
547
  ? `#${element.attributes["w:anchor"]}`
@@ -327,22 +583,30 @@ function parseBookmarkElement(
327
583
  };
328
584
  }
329
585
 
330
- function parseFieldElement(element: XmlElementNode): Extract<InlineNode, { type: "field" }> {
331
- const rawFieldType =
332
- element.attributes["w:fldCharType"] ??
333
- element.attributes.fldCharType ??
334
- localName(element.name);
335
- const fieldType: "simple" | "complex" = rawFieldType === "complex" ? "complex" : "simple";
336
- const instruction =
337
- element.attributes["w:instr"] ??
338
- element.attributes.instr ??
339
- extractTextContent(element);
340
- return {
586
+ function pushFieldNode(
587
+ nodes: InlineNode[],
588
+ element: XmlElementNode,
589
+ fieldType: "simple" | "complex",
590
+ ): void {
591
+ const instruction = readFieldInstruction(element);
592
+ if (!instruction) {
593
+ return;
594
+ }
595
+
596
+ nodes.push({
341
597
  type: "field",
342
598
  fieldType,
343
599
  instruction,
344
600
  children: [],
345
- };
601
+ });
602
+ }
603
+
604
+ function readFieldInstruction(element: XmlElementNode): string | undefined {
605
+ const instruction =
606
+ element.attributes["w:instr"] ??
607
+ element.attributes.instr ??
608
+ extractTextContent(element);
609
+ return instruction.trim().length > 0 ? instruction : undefined;
346
610
  }
347
611
 
348
612
  function parseRunProperties(rElement: XmlElementNode): TextMark[] {
@@ -387,12 +651,144 @@ function parseRunProperties(rElement: XmlElementNode): TextMark[] {
387
651
  marks.push({ type: "doubleStrikethrough" });
388
652
  }
389
653
  break;
654
+ case "rFonts": {
655
+ const family =
656
+ child.attributes["w:ascii"] ??
657
+ child.attributes["w:hAnsi"] ??
658
+ child.attributes.ascii ??
659
+ child.attributes.hAnsi;
660
+ if (family) {
661
+ marks.push({ type: "fontFamily", val: family });
662
+ }
663
+ break;
664
+ }
665
+ case "sz": {
666
+ const szVal = child.attributes["w:val"] ?? child.attributes.val;
667
+ if (szVal) {
668
+ const size = Number.parseInt(szVal, 10);
669
+ if (Number.isFinite(size) && size > 0) {
670
+ marks.push({ type: "fontSize", val: size });
671
+ }
672
+ }
673
+ break;
674
+ }
675
+ case "color": {
676
+ const colorVal = child.attributes["w:val"] ?? child.attributes.val;
677
+ if (colorVal && colorVal !== "auto") {
678
+ marks.push({ type: "textColor", color: colorVal });
679
+ }
680
+ break;
681
+ }
682
+ case "shd": {
683
+ const fill = child.attributes["w:fill"] ?? child.attributes.fill;
684
+ if (fill && fill !== "auto") {
685
+ marks.push({ type: "backgroundColor", color: fill });
686
+ }
687
+ break;
688
+ }
689
+ case "highlight": {
690
+ const resolvedHighlight = resolveHighlightColor(
691
+ child.attributes["w:val"] ?? child.attributes.val,
692
+ );
693
+ if (resolvedHighlight) {
694
+ marks.push({
695
+ type: "highlight",
696
+ color: resolvedHighlight.color,
697
+ val: resolvedHighlight.val,
698
+ });
699
+ }
700
+ break;
701
+ }
702
+ case "smallCaps":
703
+ if (val !== "0" && val !== "false") {
704
+ marks.push({ type: "smallCaps" });
705
+ }
706
+ break;
707
+ case "caps":
708
+ if (val !== "0" && val !== "false") {
709
+ marks.push({ type: "allCaps" });
710
+ }
711
+ break;
390
712
  }
391
713
  }
392
714
 
393
715
  return marks;
394
716
  }
395
717
 
718
+ function readParagraphSpacing(pPr: XmlElementNode): ParagraphSpacing | undefined {
719
+ const spacingNode = findChildElementOptional(pPr, "spacing");
720
+ if (!spacingNode) return undefined;
721
+ const result: ParagraphSpacing = {};
722
+ const before = spacingNode.attributes["w:before"] ?? spacingNode.attributes.before;
723
+ if (before) result.before = Number.parseInt(before, 10);
724
+ const after = spacingNode.attributes["w:after"] ?? spacingNode.attributes.after;
725
+ if (after) result.after = Number.parseInt(after, 10);
726
+ const line = spacingNode.attributes["w:line"] ?? spacingNode.attributes.line;
727
+ if (line) result.line = Number.parseInt(line, 10);
728
+ const lineRule = spacingNode.attributes["w:lineRule"] ?? spacingNode.attributes.lineRule;
729
+ if (lineRule === "auto" || lineRule === "exact" || lineRule === "atLeast") {
730
+ result.lineRule = lineRule;
731
+ }
732
+ return Object.keys(result).length > 0 ? result : undefined;
733
+ }
734
+
735
+ function readParagraphIndentation(pPr: XmlElementNode): ParagraphIndentation | undefined {
736
+ const indNode = findChildElementOptional(pPr, "ind");
737
+ if (!indNode) return undefined;
738
+ const result: ParagraphIndentation = {};
739
+ const left = indNode.attributes["w:left"] ?? indNode.attributes.left;
740
+ if (left) result.left = Number.parseInt(left, 10);
741
+ const right = indNode.attributes["w:right"] ?? indNode.attributes.right;
742
+ if (right) result.right = Number.parseInt(right, 10);
743
+ const firstLine = indNode.attributes["w:firstLine"] ?? indNode.attributes.firstLine;
744
+ if (firstLine) result.firstLine = Number.parseInt(firstLine, 10);
745
+ const hanging = indNode.attributes["w:hanging"] ?? indNode.attributes.hanging;
746
+ if (hanging) result.hanging = Number.parseInt(hanging, 10);
747
+ return Object.keys(result).length > 0 ? result : undefined;
748
+ }
749
+
750
+ function readParagraphTabStops(pPr: XmlElementNode): TabStop[] | undefined {
751
+ const tabsNode = findChildElementOptional(pPr, "tabs");
752
+ if (!tabsNode) return undefined;
753
+
754
+ const tabStops: TabStop[] = [];
755
+ for (const child of tabsNode.children) {
756
+ if (child.type !== "element" || localName(child.name) !== "tab") continue;
757
+ const pos = child.attributes["w:pos"] ?? child.attributes.pos;
758
+ const val = (child.attributes["w:val"] ?? child.attributes.val ?? "left").toLowerCase();
759
+ const leader = (child.attributes["w:leader"] ?? child.attributes.leader ?? "none").toLowerCase();
760
+
761
+ if (pos === undefined) continue;
762
+ const position = Number.parseInt(pos, 10);
763
+ if (!Number.isFinite(position)) continue;
764
+
765
+ const align = (["left", "center", "right", "decimal", "bar", "clear"] as const).includes(
766
+ val as "left" | "center" | "right" | "decimal" | "bar" | "clear",
767
+ )
768
+ ? (val as TabStop["align"])
769
+ : "left";
770
+
771
+ const leaderValue =
772
+ leader === "none" ||
773
+ leader === "dot" ||
774
+ leader === "hyphen" ||
775
+ leader === "underscore" ||
776
+ leader === "heavy"
777
+ ? (leader as Exclude<TabStop["leader"], "middleDot">)
778
+ : leader === "middledot"
779
+ ? "middleDot"
780
+ : undefined;
781
+
782
+ tabStops.push({
783
+ position,
784
+ align,
785
+ ...(leaderValue && leaderValue !== "none" ? { leader: leaderValue } : {}),
786
+ });
787
+ }
788
+
789
+ return tabStops.length > 0 ? tabStops : undefined;
790
+ }
791
+
396
792
  function extractTextContent(tElement: XmlElementNode): string {
397
793
  let text = "";
398
794
  for (const child of tElement.children) {
@@ -413,19 +809,244 @@ function findChildElementOptional(
413
809
  );
414
810
  }
415
811
 
812
+ function findFirstDescendant(
813
+ node: XmlElementNode,
814
+ childLocalName: string,
815
+ ): XmlElementNode | undefined {
816
+ for (const child of node.children) {
817
+ if (child.type !== "element") continue;
818
+ if (localName(child.name) === childLocalName) {
819
+ return child;
820
+ }
821
+ const nested = findFirstDescendant(child, childLocalName);
822
+ if (nested) {
823
+ return nested;
824
+ }
825
+ }
826
+ return undefined;
827
+ }
828
+
416
829
  function localName(name: string): string {
417
830
  const separatorIndex = name.indexOf(":");
418
831
  return separatorIndex >= 0 ? name.slice(separatorIndex + 1) : name;
419
832
  }
420
833
 
834
+ // ---- Simple secondary-story table support ----
835
+
836
+ /**
837
+ * Revision-bearing, field-bearing, or structurally risky elements that
838
+ * disqualify a secondary-story table from supported-roundtrip.
839
+ */
840
+ const RISKY_TABLE_ELEMENT_NAMES = new Set([
841
+ "ins",
842
+ "del",
843
+ "moveFrom",
844
+ "moveTo",
845
+ "tblPrChange",
846
+ "trPrChange",
847
+ "tcPrChange",
848
+ "rPrChange",
849
+ "pPrChange",
850
+ "sectPrChange",
851
+ "sdt",
852
+ "customXml",
853
+ ]);
854
+
855
+ function isSimpleSecondaryStoryTable(tblElement: XmlElementNode): boolean {
856
+ return !containsRiskyElement(tblElement);
857
+ }
858
+
859
+ function containsRiskyElement(element: XmlElementNode): boolean {
860
+ for (const child of element.children) {
861
+ if (child.type !== "element") {
862
+ continue;
863
+ }
864
+ const name = localName(child.name);
865
+ if (name === "fldSimple" || name === "instrText") {
866
+ const instruction =
867
+ child.attributes["w:instr"] ??
868
+ child.attributes.instr ??
869
+ extractTextContent(child);
870
+ const classification = classifyFieldInstruction(instruction);
871
+ if (!isSafeSecondaryStoryFieldFamily(classification.family)) {
872
+ return true;
873
+ }
874
+ continue;
875
+ }
876
+ if (RISKY_TABLE_ELEMENT_NAMES.has(name)) {
877
+ return true;
878
+ }
879
+ if (name === "fldChar") {
880
+ continue;
881
+ }
882
+ // Nested tables remain risky
883
+ if (name === "tbl") {
884
+ return true;
885
+ }
886
+ if (containsRiskyElement(child)) {
887
+ return true;
888
+ }
889
+ }
890
+ return false;
891
+ }
892
+
893
+ function isSafeSecondaryStoryFieldFamily(family: string): boolean {
894
+ return (
895
+ family === "REF" ||
896
+ family === "PAGEREF" ||
897
+ family === "NOTEREF" ||
898
+ family === "TOC" ||
899
+ family === "PAGE" ||
900
+ family === "NUMPAGES"
901
+ );
902
+ }
903
+
904
+ function parseSimpleTableElement(tblElement: XmlElementNode): TableNode {
905
+ let gridColumns: number[] = [];
906
+ const rows: TableRowNode[] = [];
907
+ let propertiesXml: string | undefined;
908
+ let styleId: string | undefined;
909
+
910
+ for (const child of tblElement.children) {
911
+ if (child.type !== "element") continue;
912
+ const name = localName(child.name);
913
+
914
+ if (name === "tblPr") {
915
+ propertiesXml = serializeElementToXml(child);
916
+ const pStyle = findChildElementOptional(child, "tblStyle");
917
+ styleId = pStyle?.attributes["w:val"] ?? pStyle?.attributes.val;
918
+ } else if (name === "tblGrid") {
919
+ gridColumns = readGridColumns(child);
920
+ } else if (name === "tr") {
921
+ rows.push(parseSimpleTableRow(child));
922
+ }
923
+ }
924
+
925
+ return {
926
+ type: "table",
927
+ ...(styleId ? { styleId } : {}),
928
+ ...(propertiesXml ? { propertiesXml } : {}),
929
+ gridColumns,
930
+ rows,
931
+ };
932
+ }
933
+
934
+ function readGridColumns(tblGrid: XmlElementNode): number[] {
935
+ const columns: number[] = [];
936
+ for (const child of tblGrid.children) {
937
+ if (child.type !== "element") continue;
938
+ if (localName(child.name) === "gridCol") {
939
+ const w = child.attributes["w:w"] ?? child.attributes.w ?? "0";
940
+ columns.push(Number.parseInt(w, 10) || 0);
941
+ }
942
+ }
943
+ return columns;
944
+ }
945
+
946
+ function parseSimpleTableRow(trElement: XmlElementNode): TableRowNode {
947
+ const cells: TableCellNode[] = [];
948
+ let propertiesXml: string | undefined;
949
+
950
+ for (const child of trElement.children) {
951
+ if (child.type !== "element") continue;
952
+ const name = localName(child.name);
953
+
954
+ if (name === "trPr") {
955
+ propertiesXml = serializeElementToXml(child);
956
+ } else if (name === "tc") {
957
+ cells.push(parseSimpleTableCell(child));
958
+ }
959
+ }
960
+
961
+ return {
962
+ type: "table_row",
963
+ ...(propertiesXml ? { propertiesXml } : {}),
964
+ cells,
965
+ };
966
+ }
967
+
968
+ function parseSimpleTableCell(tcElement: XmlElementNode): TableCellNode {
969
+ const children: BlockNode[] = [];
970
+ let propertiesXml: string | undefined;
971
+ let gridSpan: number | undefined;
972
+ let verticalMerge: "restart" | "continue" | undefined;
973
+
974
+ for (const child of tcElement.children) {
975
+ if (child.type !== "element") continue;
976
+ const name = localName(child.name);
977
+
978
+ if (name === "tcPr") {
979
+ propertiesXml = serializeElementToXml(child);
980
+ const gsEl = findChildElementOptional(child, "gridSpan");
981
+ const gsVal = gsEl?.attributes["w:val"] ?? gsEl?.attributes.val;
982
+ if (gsVal) gridSpan = Number.parseInt(gsVal, 10) || undefined;
983
+
984
+ const vmEl = findChildElementOptional(child, "vMerge");
985
+ if (vmEl) {
986
+ const vmVal = vmEl.attributes["w:val"] ?? vmEl.attributes.val ?? "continue";
987
+ verticalMerge = vmVal === "restart" ? "restart" : "continue";
988
+ }
989
+ } else if (name === "p") {
990
+ children.push(parseParagraphElement(child));
991
+ }
992
+ }
993
+
994
+ return {
995
+ type: "table_cell",
996
+ ...(propertiesXml ? { propertiesXml } : {}),
997
+ ...(gridSpan ? { gridSpan } : {}),
998
+ ...(verticalMerge ? { verticalMerge } : {}),
999
+ children: children.length > 0 ? children : [{ type: "paragraph", children: [] }],
1000
+ };
1001
+ }
1002
+
1003
+ /**
1004
+ * Serialize an XmlElementNode back to XML string for propertiesXml preservation.
1005
+ */
1006
+ function serializeElementToXml(element: XmlElementNode): string {
1007
+ const attrs = Object.entries(element.attributes)
1008
+ .map(([key, value]) => ` ${key}="${escapeXmlAttribute(value)}"`)
1009
+ .join("");
1010
+ const children = element.children
1011
+ .map((child) => {
1012
+ if (child.type === "text") {
1013
+ return escapeXmlText(child.text);
1014
+ }
1015
+ return serializeElementToXml(child);
1016
+ })
1017
+ .join("");
1018
+ if (children.length === 0) {
1019
+ return `<${element.name}${attrs}/>`;
1020
+ }
1021
+ return `<${element.name}${attrs}>${children}</${element.name}>`;
1022
+ }
1023
+
1024
+ function escapeXmlAttribute(text: string): string {
1025
+ return text
1026
+ .replace(/&/g, "&amp;")
1027
+ .replace(/"/g, "&quot;")
1028
+ .replace(/</g, "&lt;")
1029
+ .replace(/>/g, "&gt;");
1030
+ }
1031
+
1032
+ function escapeXmlText(text: string): string {
1033
+ return text
1034
+ .replace(/&/g, "&amp;")
1035
+ .replace(/</g, "&lt;")
1036
+ .replace(/>/g, "&gt;");
1037
+ }
1038
+
421
1039
  // ---- Minimal XML parser (same pattern as parse-numbering.ts) ----
422
1040
 
423
1041
  function parseXml(xml: string): XmlElementNode {
1042
+ currentSourceXml = xml;
424
1043
  const root: XmlElementNode = {
425
1044
  type: "element",
426
1045
  name: "__root__",
427
1046
  attributes: {},
428
1047
  children: [],
1048
+ start: 0,
1049
+ end: xml.length,
429
1050
  };
430
1051
  const stack: XmlElementNode[] = [root];
431
1052
  let cursor = 0;
@@ -449,6 +1070,8 @@ function parseXml(xml: string): XmlElementNode {
449
1070
  stack[stack.length - 1]?.children.push({
450
1071
  type: "text",
451
1072
  text: xml.slice(cursor + 9, textEnd),
1073
+ start: cursor,
1074
+ end: end >= 0 ? end + 3 : xml.length,
452
1075
  });
453
1076
  cursor = end >= 0 ? end + 3 : xml.length;
454
1077
  continue;
@@ -459,7 +1082,7 @@ function parseXml(xml: string): XmlElementNode {
459
1082
  const end = nextTag >= 0 ? nextTag : xml.length;
460
1083
  const text = decodeXmlEntities(xml.slice(cursor, end));
461
1084
  if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
462
- stack[stack.length - 1]?.children.push({ type: "text", text });
1085
+ stack[stack.length - 1]?.children.push({ type: "text", text, start: cursor, end });
463
1086
  }
464
1087
  cursor = end;
465
1088
  continue;
@@ -471,7 +1094,10 @@ function parseXml(xml: string): XmlElementNode {
471
1094
  if (end < 0) {
472
1095
  break;
473
1096
  }
474
- stack.pop();
1097
+ const current = stack.pop();
1098
+ if (current) {
1099
+ current.end = end + 1;
1100
+ }
475
1101
  cursor = end + 1;
476
1102
  continue;
477
1103
  }
@@ -498,6 +1124,8 @@ function parseXml(xml: string): XmlElementNode {
498
1124
  name: tagName,
499
1125
  attributes,
500
1126
  children: [],
1127
+ start: cursor,
1128
+ end: tagEnd + 1,
501
1129
  };
502
1130
 
503
1131
  stack[stack.length - 1]?.children.push(element);