@beyondwork/docx-react-component 1.0.19 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/package.json +1 -1
  2. package/src/api/public-types.ts +336 -0
  3. package/src/api/session-state.ts +2 -0
  4. package/src/core/commands/formatting-commands.ts +1 -1
  5. package/src/core/commands/index.ts +14 -2
  6. package/src/core/search/search-text.ts +28 -0
  7. package/src/core/state/editor-state.ts +3 -0
  8. package/src/index.ts +21 -0
  9. package/src/io/docx-session.ts +363 -17
  10. package/src/io/export/serialize-comments.ts +104 -34
  11. package/src/io/export/serialize-footnotes.ts +198 -1
  12. package/src/io/export/serialize-headers-footers.ts +203 -10
  13. package/src/io/export/serialize-main-document.ts +83 -3
  14. package/src/io/export/split-review-boundaries.ts +181 -19
  15. package/src/io/normalize/normalize-text.ts +82 -8
  16. package/src/io/ooxml/highlight-colors.ts +39 -0
  17. package/src/io/ooxml/parse-comments.ts +85 -19
  18. package/src/io/ooxml/parse-fields.ts +396 -0
  19. package/src/io/ooxml/parse-footnotes.ts +240 -2
  20. package/src/io/ooxml/parse-headers-footers.ts +431 -7
  21. package/src/io/ooxml/parse-inline-media.ts +15 -1
  22. package/src/io/ooxml/parse-main-document.ts +396 -14
  23. package/src/io/ooxml/parse-revisions.ts +317 -38
  24. package/src/legal/bookmarks.ts +44 -0
  25. package/src/legal/cross-references.ts +59 -1
  26. package/src/model/canonical-document.ts +117 -1
  27. package/src/model/snapshot.ts +85 -1
  28. package/src/review/store/revision-store.ts +6 -0
  29. package/src/review/store/revision-types.ts +1 -0
  30. package/src/runtime/document-navigation.ts +52 -13
  31. package/src/runtime/document-runtime.ts +1521 -75
  32. package/src/runtime/read-only-diagnostics-runtime.ts +8 -0
  33. package/src/runtime/session-capabilities.ts +33 -3
  34. package/src/runtime/surface-projection.ts +86 -25
  35. package/src/runtime/table-schema.ts +2 -2
  36. package/src/runtime/view-state.ts +24 -6
  37. package/src/runtime/workflow-markup.ts +349 -0
  38. package/src/ui/WordReviewEditor.tsx +850 -1315
  39. package/src/ui/editor-command-bag.ts +120 -0
  40. package/src/ui/editor-runtime-boundary.ts +1422 -0
  41. package/src/ui/editor-shell-view.tsx +134 -0
  42. package/src/ui/editor-surface-controller.tsx +51 -0
  43. package/src/ui/headless/revision-decoration-model.ts +4 -4
  44. package/src/ui/runtime-snapshot-selectors.ts +197 -0
  45. package/src/ui-tailwind/chrome/tw-alert-banner.tsx +18 -2
  46. package/src/ui-tailwind/chrome/tw-image-context-toolbar.tsx +129 -0
  47. package/src/ui-tailwind/chrome/tw-layout-panel.tsx +114 -0
  48. package/src/ui-tailwind/chrome/tw-object-context-toolbar.tsx +34 -0
  49. package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +27 -2
  50. package/src/ui-tailwind/chrome/tw-table-context-toolbar.tsx +128 -0
  51. package/src/ui-tailwind/editor-surface/perf-probe.ts +86 -14
  52. package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +2 -2
  53. package/src/ui-tailwind/editor-surface/pm-decorations.ts +35 -0
  54. package/src/ui-tailwind/editor-surface/pm-position-map.ts +1 -1
  55. package/src/ui-tailwind/editor-surface/pm-schema.ts +139 -8
  56. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +98 -48
  57. package/src/ui-tailwind/editor-surface/surface-build-keys.ts +51 -0
  58. package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +7 -1
  59. package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +174 -48
  60. package/src/ui-tailwind/page-chrome-model.ts +27 -0
  61. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +7 -7
  62. package/src/ui-tailwind/review/tw-health-panel.tsx +31 -2
  63. package/src/ui-tailwind/review/tw-review-rail.tsx +3 -3
  64. package/src/ui-tailwind/review/tw-revision-sidebar.tsx +15 -15
  65. package/src/ui-tailwind/theme/editor-theme.css +4 -0
  66. package/src/ui-tailwind/toolbar/tw-toolbar.tsx +543 -5
  67. package/src/ui-tailwind/tw-review-workspace.tsx +316 -19
  68. package/src/validation/compatibility-engine.ts +27 -4
  69. package/src/validation/compatibility-report.ts +1 -0
  70. package/src/validation/docx-comment-proof.ts +220 -0
@@ -3,12 +3,18 @@ import type {
3
3
  FootnoteRefNode,
4
4
  HeaderFooterVariant,
5
5
  InlineNode,
6
+ ParagraphIndentation,
6
7
  ParagraphNode,
8
+ ParagraphSpacing,
9
+ TabStop,
7
10
  TableCellNode,
8
11
  TableNode,
9
12
  TableRowNode,
10
13
  TextMark,
11
14
  } from "../../model/canonical-document.ts";
15
+ import { resolveHighlightColor } from "./highlight-colors.ts";
16
+ import { classifyFieldInstruction } from "./parse-fields.ts";
17
+ import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
12
18
 
13
19
  // ---- Public types ----
14
20
 
@@ -30,15 +36,21 @@ interface XmlElementNode {
30
36
  name: string;
31
37
  attributes: Record<string, string>;
32
38
  children: XmlNode[];
39
+ start: number;
40
+ end: number;
33
41
  }
34
42
 
35
43
  interface XmlTextNode {
36
44
  type: "text";
37
45
  text: string;
46
+ start: number;
47
+ end: number;
38
48
  }
39
49
 
40
50
  type XmlNode = XmlElementNode | XmlTextNode;
41
51
 
52
+ let currentSourceXml = "";
53
+
42
54
  // ---- Public API ----
43
55
 
44
56
  /**
@@ -137,8 +149,9 @@ function extractSectPrRefs(
137
149
 
138
150
  if (relationshipId) {
139
151
  // Avoid duplicates (multiple sectPr may reference same header)
152
+ const dedupeKey = `${kind}:${variant}:${relationshipId}`;
140
153
  const alreadyAdded = refs.some(
141
- (ref) => ref.relationshipId === relationshipId && ref.kind === kind,
154
+ (ref) => `${ref.kind}:${ref.variant}:${ref.relationshipId}` === dedupeKey,
142
155
  );
143
156
  if (!alreadyAdded) {
144
157
  refs.push({ variant, relationshipId, kind, sectionIndex });
@@ -189,6 +202,7 @@ function parseHdrFtrXml(
189
202
  type: "opaque_block",
190
203
  fragmentId: "fragment:hdrftr-tbl",
191
204
  warningId: "warning:hdrftr-opaque-table",
205
+ rawXml: serializeElementToXml(child),
192
206
  });
193
207
  }
194
208
  } else {
@@ -197,6 +211,7 @@ function parseHdrFtrXml(
197
211
  type: "opaque_block",
198
212
  fragmentId: "fragment:hdrftr-opaque",
199
213
  warningId: "warning:hdrftr-opaque-block",
214
+ rawXml: serializeElementToXml(child),
200
215
  });
201
216
  }
202
217
  }
@@ -207,7 +222,15 @@ function parseHdrFtrXml(
207
222
  function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
208
223
  let styleId: string | undefined;
209
224
  let alignment: ParagraphNode["alignment"];
225
+ let spacing: ParagraphNode["spacing"];
226
+ let indentation: ParagraphNode["indentation"];
227
+ let tabStops: ParagraphNode["tabStops"];
210
228
  const children: InlineNode[] = [];
229
+ let activeComplexField: {
230
+ instruction: string;
231
+ children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
232
+ mode: "instruction" | "result";
233
+ } | null = null;
211
234
 
212
235
  for (const child of pElement.children) {
213
236
  if (child.type !== "element") {
@@ -224,25 +247,121 @@ function parseParagraphElement(pElement: XmlElementNode): ParagraphNode {
224
247
  if (jcVal === "left" || jcVal === "center" || jcVal === "right" || jcVal === "both" || jcVal === "distribute") {
225
248
  alignment = jcVal;
226
249
  }
250
+ spacing = readParagraphSpacing(child);
251
+ indentation = readParagraphIndentation(child);
252
+ tabStops = readParagraphTabStops(child);
227
253
  } else if (name === "r") {
228
- children.push(...parseRunElement(child));
254
+ activeComplexField = appendRunNodes(child, children, activeComplexField);
229
255
  } else if (name === "hyperlink") {
230
256
  children.push(parseHyperlinkElement(child));
231
257
  } else if (name === "bookmarkStart" || name === "bookmarkEnd") {
232
258
  children.push(parseBookmarkElement(child));
233
259
  } else if (name === "fldSimple") {
260
+ if (activeComplexField && activeComplexField.instruction.trim().length > 0) {
261
+ children.push({
262
+ type: "field",
263
+ fieldType: "complex",
264
+ instruction: activeComplexField.instruction,
265
+ children: activeComplexField.children,
266
+ });
267
+ activeComplexField = null;
268
+ }
234
269
  pushFieldNode(children, child, "simple");
235
270
  }
236
271
  }
237
272
 
273
+ if (activeComplexField && activeComplexField.instruction.trim().length > 0) {
274
+ children.push({
275
+ type: "field",
276
+ fieldType: "complex",
277
+ instruction: activeComplexField.instruction,
278
+ children: activeComplexField.children,
279
+ });
280
+ }
281
+
238
282
  return {
239
283
  type: "paragraph",
240
284
  ...(styleId ? { styleId } : {}),
241
285
  ...(alignment ? { alignment } : {}),
286
+ ...(spacing ? { spacing } : {}),
287
+ ...(indentation ? { indentation } : {}),
288
+ ...(tabStops && tabStops.length > 0 ? { tabStops } : {}),
242
289
  children,
243
290
  };
244
291
  }
245
292
 
293
+ function appendRunNodes(
294
+ rElement: XmlElementNode,
295
+ nodes: InlineNode[],
296
+ activeComplexField: {
297
+ instruction: string;
298
+ children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
299
+ mode: "instruction" | "result";
300
+ } | null,
301
+ ): {
302
+ instruction: string;
303
+ children: Array<Extract<InlineNode, { type: "text" | "hard_break" | "tab" }>>;
304
+ mode: "instruction" | "result";
305
+ } | null {
306
+ const marks: TextMark[] = parseRunProperties(rElement);
307
+
308
+ for (const child of rElement.children) {
309
+ if (child.type !== "element") {
310
+ continue;
311
+ }
312
+
313
+ const name = localName(child.name);
314
+ if (name === "fldChar") {
315
+ const fldType = child.attributes["w:fldCharType"] ?? child.attributes.fldCharType;
316
+ if (fldType === "begin") {
317
+ activeComplexField = { instruction: "", children: [], mode: "instruction" };
318
+ } else if (fldType === "separate" && activeComplexField) {
319
+ activeComplexField.mode = "result";
320
+ } else if (fldType === "end" && activeComplexField) {
321
+ if (activeComplexField.instruction.trim().length > 0) {
322
+ nodes.push({
323
+ type: "field",
324
+ fieldType: "complex",
325
+ instruction: activeComplexField.instruction,
326
+ children: activeComplexField.children,
327
+ });
328
+ }
329
+ activeComplexField = null;
330
+ }
331
+ continue;
332
+ }
333
+
334
+ if (name === "instrText") {
335
+ if (activeComplexField) {
336
+ activeComplexField.instruction += extractTextContent(child);
337
+ } else {
338
+ pushFieldNode(nodes, child, "complex");
339
+ }
340
+ continue;
341
+ }
342
+
343
+ const inlineNode = parseRunChildNode(child, marks);
344
+ if (!inlineNode) {
345
+ continue;
346
+ }
347
+
348
+ if (activeComplexField?.mode === "result") {
349
+ if (
350
+ inlineNode.type === "text" ||
351
+ inlineNode.type === "hard_break" ||
352
+ inlineNode.type === "tab"
353
+ ) {
354
+ activeComplexField.children.push(inlineNode);
355
+ }
356
+ continue;
357
+ }
358
+
359
+ nodes.push(inlineNode);
360
+ }
361
+
362
+ return activeComplexField;
363
+ }
364
+
246
365
  function parseRunElement(rElement: XmlElementNode): InlineNode[] {
247
366
  const nodes: InlineNode[] = [];
248
367
  const marks: TextMark[] = parseRunProperties(rElement);
@@ -293,12 +412,136 @@ function parseRunElement(rElement: XmlElementNode): InlineNode[] {
293
412
  nodes.push(parseBookmarkElement(child));
294
413
  } else if (name === "instrText") {
295
414
  pushFieldNode(nodes, child, "complex");
415
+ } else if (name === "drawing") {
416
+ const drawingXml = currentSourceXml.slice(child.start, child.end);
417
+ const shapeResult = parseShapeXml(drawingXml);
418
+ if (shapeResult) {
419
+ nodes.push(shapeResult);
420
+ }
421
+ } else if (name === "pict") {
422
+ const pictXml = currentSourceXml.slice(child.start, child.end);
423
+ const vmlResult = parseVmlXml(pictXml);
424
+ if (vmlResult) {
425
+ nodes.push(vmlResult);
426
+ }
427
+ } else if (name === "AlternateContent") {
428
+ const drawingNode = findFirstDescendant(child, "drawing");
429
+ if (drawingNode) {
430
+ const drawingXml = currentSourceXml.slice(drawingNode.start, drawingNode.end);
431
+ const shapeResult = parseShapeXml(drawingXml);
432
+ if (shapeResult) {
433
+ nodes.push({
434
+ ...shapeResult,
435
+ rawXml: currentSourceXml.slice(child.start, child.end),
436
+ });
437
+ continue;
438
+ }
439
+ }
440
+ const pictNode = findFirstDescendant(child, "pict");
441
+ if (pictNode) {
442
+ const pictXml = currentSourceXml.slice(pictNode.start, pictNode.end);
443
+ const vmlResult = parseVmlXml(pictXml);
444
+ if (vmlResult) {
445
+ nodes.push({
446
+ ...vmlResult,
447
+ rawXml: currentSourceXml.slice(child.start, child.end),
448
+ });
449
+ }
450
+ }
296
451
  }
297
452
  }
298
453
 
299
454
  return nodes;
300
455
  }
301
456
 
457
+ function parseRunChildNode(
458
+ child: XmlElementNode,
459
+ marks: TextMark[],
460
+ ): InlineNode | null {
461
+ const name = localName(child.name);
462
+
463
+ if (name === "t") {
464
+ const text = extractTextContent(child);
465
+ if (text.length > 0) {
466
+ return {
467
+ type: "text",
468
+ text,
469
+ ...(marks.length > 0 ? { marks } : {}),
470
+ };
471
+ }
472
+ return null;
473
+ }
474
+ if (name === "br") {
475
+ return { type: "hard_break" };
476
+ }
477
+ if (name === "tab") {
478
+ return { type: "tab" };
479
+ }
480
+ if (name === "footnoteReference") {
481
+ const noteId =
482
+ child.attributes["w:id"] ?? child.attributes.id ?? "";
483
+ if (noteId) {
484
+ const ref: FootnoteRefNode = {
485
+ type: "footnote_ref",
486
+ noteId,
487
+ noteKind: "footnote",
488
+ };
489
+ return ref;
490
+ }
491
+ return null;
492
+ }
493
+ if (name === "endnoteReference") {
494
+ const noteId =
495
+ child.attributes["w:id"] ?? child.attributes.id ?? "";
496
+ if (noteId) {
497
+ const ref: FootnoteRefNode = {
498
+ type: "footnote_ref",
499
+ noteId,
500
+ noteKind: "endnote",
501
+ };
502
+ return ref;
503
+ }
504
+ return null;
505
+ }
506
+ if (name === "bookmarkStart" || name === "bookmarkEnd") {
507
+ return parseBookmarkElement(child);
508
+ }
509
+ if (name === "drawing") {
510
+ const drawingXml = currentSourceXml.slice(child.start, child.end);
511
+ return parseShapeXml(drawingXml);
512
+ }
513
+ if (name === "pict") {
514
+ const pictXml = currentSourceXml.slice(child.start, child.end);
515
+ return parseVmlXml(pictXml);
516
+ }
517
+ if (name === "AlternateContent") {
518
+ const drawingNode = findFirstDescendant(child, "drawing");
519
+ if (drawingNode) {
520
+ const drawingXml = currentSourceXml.slice(drawingNode.start, drawingNode.end);
521
+ const shapeResult = parseShapeXml(drawingXml);
522
+ if (shapeResult) {
523
+ return {
524
+ ...shapeResult,
525
+ rawXml: currentSourceXml.slice(child.start, child.end),
526
+ };
527
+ }
528
+ }
529
+ const pictNode = findFirstDescendant(child, "pict");
530
+ if (pictNode) {
531
+ const pictXml = currentSourceXml.slice(pictNode.start, pictNode.end);
532
+ const vmlResult = parseVmlXml(pictXml);
533
+ if (vmlResult) {
534
+ return {
535
+ ...vmlResult,
536
+ rawXml: currentSourceXml.slice(child.start, child.end),
537
+ };
538
+ }
539
+ }
540
+ }
541
+
542
+ return null;
543
+ }
544
+
302
545
  function parseHyperlinkElement(element: XmlElementNode): Extract<InlineNode, { type: "hyperlink" }> {
303
546
  const href = element.attributes["w:anchor"]
304
547
  ? `#${element.attributes["w:anchor"]}`
@@ -408,12 +651,144 @@ function parseRunProperties(rElement: XmlElementNode): TextMark[] {
408
651
  marks.push({ type: "doubleStrikethrough" });
409
652
  }
410
653
  break;
654
+ case "rFonts": {
655
+ const family =
656
+ child.attributes["w:ascii"] ??
657
+ child.attributes["w:hAnsi"] ??
658
+ child.attributes.ascii ??
659
+ child.attributes.hAnsi;
660
+ if (family) {
661
+ marks.push({ type: "fontFamily", val: family });
662
+ }
663
+ break;
664
+ }
665
+ case "sz": {
666
+ const szVal = child.attributes["w:val"] ?? child.attributes.val;
667
+ if (szVal) {
668
+ const size = Number.parseInt(szVal, 10);
669
+ if (Number.isFinite(size) && size > 0) {
670
+ marks.push({ type: "fontSize", val: size });
671
+ }
672
+ }
673
+ break;
674
+ }
675
+ case "color": {
676
+ const colorVal = child.attributes["w:val"] ?? child.attributes.val;
677
+ if (colorVal && colorVal !== "auto") {
678
+ marks.push({ type: "textColor", color: colorVal });
679
+ }
680
+ break;
681
+ }
682
+ case "shd": {
683
+ const fill = child.attributes["w:fill"] ?? child.attributes.fill;
684
+ if (fill && fill !== "auto") {
685
+ marks.push({ type: "backgroundColor", color: fill });
686
+ }
687
+ break;
688
+ }
689
+ case "highlight": {
690
+ const resolvedHighlight = resolveHighlightColor(
691
+ child.attributes["w:val"] ?? child.attributes.val,
692
+ );
693
+ if (resolvedHighlight) {
694
+ marks.push({
695
+ type: "highlight",
696
+ color: resolvedHighlight.color,
697
+ val: resolvedHighlight.val,
698
+ });
699
+ }
700
+ break;
701
+ }
702
+ case "smallCaps":
703
+ if (val !== "0" && val !== "false") {
704
+ marks.push({ type: "smallCaps" });
705
+ }
706
+ break;
707
+ case "caps":
708
+ if (val !== "0" && val !== "false") {
709
+ marks.push({ type: "allCaps" });
710
+ }
711
+ break;
411
712
  }
412
713
  }
413
714
 
414
715
  return marks;
415
716
  }
416
717
 
718
+ function readParagraphSpacing(pPr: XmlElementNode): ParagraphSpacing | undefined {
719
+ const spacingNode = findChildElementOptional(pPr, "spacing");
720
+ if (!spacingNode) return undefined;
721
+ const result: ParagraphSpacing = {};
722
+ const before = spacingNode.attributes["w:before"] ?? spacingNode.attributes.before;
723
+ if (before) result.before = Number.parseInt(before, 10);
724
+ const after = spacingNode.attributes["w:after"] ?? spacingNode.attributes.after;
725
+ if (after) result.after = Number.parseInt(after, 10);
726
+ const line = spacingNode.attributes["w:line"] ?? spacingNode.attributes.line;
727
+ if (line) result.line = Number.parseInt(line, 10);
728
+ const lineRule = spacingNode.attributes["w:lineRule"] ?? spacingNode.attributes.lineRule;
729
+ if (lineRule === "auto" || lineRule === "exact" || lineRule === "atLeast") {
730
+ result.lineRule = lineRule;
731
+ }
732
+ return Object.keys(result).length > 0 ? result : undefined;
733
+ }
734
+
735
+ function readParagraphIndentation(pPr: XmlElementNode): ParagraphIndentation | undefined {
736
+ const indNode = findChildElementOptional(pPr, "ind");
737
+ if (!indNode) return undefined;
738
+ const result: ParagraphIndentation = {};
739
+ const left = indNode.attributes["w:left"] ?? indNode.attributes.left;
740
+ if (left) result.left = Number.parseInt(left, 10);
741
+ const right = indNode.attributes["w:right"] ?? indNode.attributes.right;
742
+ if (right) result.right = Number.parseInt(right, 10);
743
+ const firstLine = indNode.attributes["w:firstLine"] ?? indNode.attributes.firstLine;
744
+ if (firstLine) result.firstLine = Number.parseInt(firstLine, 10);
745
+ const hanging = indNode.attributes["w:hanging"] ?? indNode.attributes.hanging;
746
+ if (hanging) result.hanging = Number.parseInt(hanging, 10);
747
+ return Object.keys(result).length > 0 ? result : undefined;
748
+ }
749
+
750
+ function readParagraphTabStops(pPr: XmlElementNode): TabStop[] | undefined {
751
+ const tabsNode = findChildElementOptional(pPr, "tabs");
752
+ if (!tabsNode) return undefined;
753
+
754
+ const tabStops: TabStop[] = [];
755
+ for (const child of tabsNode.children) {
756
+ if (child.type !== "element" || localName(child.name) !== "tab") continue;
757
+ const pos = child.attributes["w:pos"] ?? child.attributes.pos;
758
+ const val = (child.attributes["w:val"] ?? child.attributes.val ?? "left").toLowerCase();
759
+ const leader = (child.attributes["w:leader"] ?? child.attributes.leader ?? "none").toLowerCase();
760
+
761
+ if (pos === undefined) continue;
762
+ const position = Number.parseInt(pos, 10);
763
+ if (!Number.isFinite(position)) continue;
764
+
765
+ const align = (["left", "center", "right", "decimal", "bar", "clear"] as const).includes(
766
+ val as "left" | "center" | "right" | "decimal" | "bar" | "clear",
767
+ )
768
+ ? (val as TabStop["align"])
769
+ : "left";
770
+
771
+ const leaderValue =
772
+ leader === "none" ||
773
+ leader === "dot" ||
774
+ leader === "hyphen" ||
775
+ leader === "underscore" ||
776
+ leader === "heavy"
777
+ ? (leader as Exclude<TabStop["leader"], "middleDot">)
778
+ : leader === "middledot"
779
+ ? "middleDot"
780
+ : undefined;
781
+
782
+ tabStops.push({
783
+ position,
784
+ align,
785
+ ...(leaderValue && leaderValue !== "none" ? { leader: leaderValue } : {}),
786
+ });
787
+ }
788
+
789
+ return tabStops.length > 0 ? tabStops : undefined;
790
+ }
791
+
417
792
  function extractTextContent(tElement: XmlElementNode): string {
418
793
  let text = "";
419
794
  for (const child of tElement.children) {
@@ -434,6 +809,23 @@ function findChildElementOptional(
434
809
  );
435
810
  }
436
811
 
812
+ function findFirstDescendant(
813
+ node: XmlElementNode,
814
+ childLocalName: string,
815
+ ): XmlElementNode | undefined {
816
+ for (const child of node.children) {
817
+ if (child.type !== "element") continue;
818
+ if (localName(child.name) === childLocalName) {
819
+ return child;
820
+ }
821
+ const nested = findFirstDescendant(child, childLocalName);
822
+ if (nested) {
823
+ return nested;
824
+ }
825
+ }
826
+ return undefined;
827
+ }
828
+
437
829
  function localName(name: string): string {
438
830
  const separatorIndex = name.indexOf(":");
439
831
  return separatorIndex >= 0 ? name.slice(separatorIndex + 1) : name;
@@ -456,9 +848,6 @@ const RISKY_TABLE_ELEMENT_NAMES = new Set([
456
848
  "rPrChange",
457
849
  "pPrChange",
458
850
  "sectPrChange",
459
- "fldSimple",
460
- "fldChar",
461
- "instrText",
462
851
  "sdt",
463
852
  "customXml",
464
853
  ]);
@@ -473,9 +862,23 @@ function containsRiskyElement(element: XmlElementNode): boolean {
473
862
  continue;
474
863
  }
475
864
  const name = localName(child.name);
865
+ if (name === "fldSimple" || name === "instrText") {
866
+ const instruction =
867
+ child.attributes["w:instr"] ??
868
+ child.attributes.instr ??
869
+ extractTextContent(child);
870
+ const classification = classifyFieldInstruction(instruction);
871
+ if (!isSafeSecondaryStoryFieldFamily(classification.family)) {
872
+ return true;
873
+ }
874
+ continue;
875
+ }
476
876
  if (RISKY_TABLE_ELEMENT_NAMES.has(name)) {
477
877
  return true;
478
878
  }
879
+ if (name === "fldChar") {
880
+ continue;
881
+ }
479
882
  // Nested tables remain risky
480
883
  if (name === "tbl") {
481
884
  return true;
@@ -487,6 +890,17 @@ function containsRiskyElement(element: XmlElementNode): boolean {
487
890
  return false;
488
891
  }
489
892
 
893
+ function isSafeSecondaryStoryFieldFamily(family: string): boolean {
894
+ return (
895
+ family === "REF" ||
896
+ family === "PAGEREF" ||
897
+ family === "NOTEREF" ||
898
+ family === "TOC" ||
899
+ family === "PAGE" ||
900
+ family === "NUMPAGES"
901
+ );
902
+ }
903
+
490
904
  function parseSimpleTableElement(tblElement: XmlElementNode): TableNode {
491
905
  let gridColumns: number[] = [];
492
906
  const rows: TableRowNode[] = [];
@@ -625,11 +1039,14 @@ function escapeXmlText(text: string): string {
625
1039
  // ---- Minimal XML parser (same pattern as parse-numbering.ts) ----
626
1040
 
627
1041
  function parseXml(xml: string): XmlElementNode {
1042
+ currentSourceXml = xml;
628
1043
  const root: XmlElementNode = {
629
1044
  type: "element",
630
1045
  name: "__root__",
631
1046
  attributes: {},
632
1047
  children: [],
1048
+ start: 0,
1049
+ end: xml.length,
633
1050
  };
634
1051
  const stack: XmlElementNode[] = [root];
635
1052
  let cursor = 0;
@@ -653,6 +1070,8 @@ function parseXml(xml: string): XmlElementNode {
653
1070
  stack[stack.length - 1]?.children.push({
654
1071
  type: "text",
655
1072
  text: xml.slice(cursor + 9, textEnd),
1073
+ start: cursor,
1074
+ end: end >= 0 ? end + 3 : xml.length,
656
1075
  });
657
1076
  cursor = end >= 0 ? end + 3 : xml.length;
658
1077
  continue;
@@ -663,7 +1082,7 @@ function parseXml(xml: string): XmlElementNode {
663
1082
  const end = nextTag >= 0 ? nextTag : xml.length;
664
1083
  const text = decodeXmlEntities(xml.slice(cursor, end));
665
1084
  if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
666
- stack[stack.length - 1]?.children.push({ type: "text", text });
1085
+ stack[stack.length - 1]?.children.push({ type: "text", text, start: cursor, end });
667
1086
  }
668
1087
  cursor = end;
669
1088
  continue;
@@ -675,7 +1094,10 @@ function parseXml(xml: string): XmlElementNode {
675
1094
  if (end < 0) {
676
1095
  break;
677
1096
  }
678
- stack.pop();
1097
+ const current = stack.pop();
1098
+ if (current) {
1099
+ current.end = end + 1;
1100
+ }
679
1101
  cursor = end + 1;
680
1102
  continue;
681
1103
  }
@@ -702,6 +1124,8 @@ function parseXml(xml: string): XmlElementNode {
702
1124
  name: tagName,
703
1125
  attributes,
704
1126
  children: [],
1127
+ start: cursor,
1128
+ end: tagEnd + 1,
705
1129
  };
706
1130
 
707
1131
  stack[stack.length - 1]?.children.push(element);
@@ -88,7 +88,7 @@ export function parseInlineMediaXml(
88
88
  const floating = anchor ? readFloatingProperties(anchor) : undefined;
89
89
 
90
90
  // Read extent dimensions (wp:extent cx/cy in EMUs)
91
- const extent = findFirstDescendant(container, "extent");
91
+ const extent = findDrawingExtent(container);
92
92
  const widthEmu = extent ? readEmuAttribute(extent, "cx") : undefined;
93
93
  const heightEmu = extent ? readEmuAttribute(extent, "cy") : undefined;
94
94
 
@@ -110,6 +110,20 @@ export function parseInlineMediaXml(
110
110
  return media;
111
111
  }
112
112
 
113
+ function findDrawingExtent(node: XmlElementNode): XmlElementNode | undefined {
114
+ const wordProcessingExtent = findFirstDescendant(node, "extent");
115
+ if (wordProcessingExtent) {
116
+ return wordProcessingExtent;
117
+ }
118
+
119
+ const transform = findFirstDescendant(node, "xfrm");
120
+ if (!transform) {
121
+ return undefined;
122
+ }
123
+
124
+ return findFirstDescendant(transform, "ext");
125
+ }
126
+
113
127
  function readFloatingProperties(
114
128
  node: XmlElementNode,
115
129
  ): NonNullable<ParsedInlineMedia["floating"]> | undefined {