@beyondwork/docx-react-component 1.0.56 → 1.0.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/api/public-types.ts +330 -0
  4. package/src/compare/diff-engine.ts +3 -0
  5. package/src/core/commands/formatting-commands.ts +1 -0
  6. package/src/core/commands/index.ts +17 -11
  7. package/src/core/selection/mapping.ts +18 -1
  8. package/src/core/selection/review-anchors.ts +29 -18
  9. package/src/io/chart-preview-resolver.ts +175 -41
  10. package/src/io/docx-session.ts +57 -2
  11. package/src/io/export/serialize-main-document.ts +82 -0
  12. package/src/io/export/serialize-styles.ts +61 -3
  13. package/src/io/export/table-properties-xml.ts +19 -4
  14. package/src/io/normalize/normalize-text.ts +33 -0
  15. package/src/io/ooxml/parse-anchor.ts +182 -0
  16. package/src/io/ooxml/parse-drawing.ts +319 -0
  17. package/src/io/ooxml/parse-fields.ts +115 -2
  18. package/src/io/ooxml/parse-fill.ts +215 -0
  19. package/src/io/ooxml/parse-font-table.ts +190 -0
  20. package/src/io/ooxml/parse-footnotes.ts +52 -1
  21. package/src/io/ooxml/parse-main-document.ts +241 -1
  22. package/src/io/ooxml/parse-numbering.ts +96 -0
  23. package/src/io/ooxml/parse-picture.ts +158 -0
  24. package/src/io/ooxml/parse-settings.ts +34 -0
  25. package/src/io/ooxml/parse-shapes.ts +87 -0
  26. package/src/io/ooxml/parse-solid-fill.ts +11 -0
  27. package/src/io/ooxml/parse-styles.ts +74 -1
  28. package/src/io/ooxml/parse-theme.ts +60 -0
  29. package/src/io/paste/html-clipboard.ts +449 -0
  30. package/src/io/paste/word-clipboard.ts +5 -1
  31. package/src/legal/_document-root.ts +26 -0
  32. package/src/legal/bookmarks.ts +4 -3
  33. package/src/legal/cross-references.ts +3 -2
  34. package/src/legal/defined-terms.ts +2 -1
  35. package/src/legal/signature-blocks.ts +2 -1
  36. package/src/model/canonical-document.ts +421 -3
  37. package/src/runtime/chart/chart-model-store.ts +73 -10
  38. package/src/runtime/document-runtime.ts +760 -41
  39. package/src/runtime/document-search.ts +61 -0
  40. package/src/runtime/edit-ops/index.ts +129 -0
  41. package/src/runtime/event-refresh-hints.ts +7 -0
  42. package/src/runtime/field-resolver.ts +341 -0
  43. package/src/runtime/footnote-resolver.ts +55 -0
  44. package/src/runtime/hyperlink-color-resolver.ts +13 -10
  45. package/src/runtime/object-grab/index.ts +51 -0
  46. package/src/runtime/paragraph-style-resolver.ts +105 -0
  47. package/src/runtime/query-scopes.ts +186 -0
  48. package/src/runtime/resolved-numbering-geometry.ts +12 -0
  49. package/src/runtime/scope-resolver.ts +60 -0
  50. package/src/runtime/selection/cursor-ops.ts +186 -15
  51. package/src/runtime/selection/index.ts +17 -1
  52. package/src/runtime/structure-ops/index.ts +77 -0
  53. package/src/runtime/styles-cascade.ts +33 -0
  54. package/src/runtime/surface-projection.ts +192 -12
  55. package/src/runtime/theme-color-resolver.ts +189 -44
  56. package/src/runtime/units.ts +46 -0
  57. package/src/runtime/view-state.ts +13 -2
  58. package/src/ui/WordReviewEditor.tsx +239 -11
  59. package/src/ui/editor-runtime-boundary.ts +97 -1
  60. package/src/ui/editor-shell-view.tsx +1 -1
  61. package/src/ui/runtime-shortcut-dispatch.ts +17 -3
  62. package/src/ui-tailwind/chart/ChartSurface.tsx +36 -10
  63. package/src/ui-tailwind/chart/layout/plot-area.ts +120 -45
  64. package/src/ui-tailwind/chart/render/area.tsx +22 -4
  65. package/src/ui-tailwind/chart/render/bar-column.tsx +37 -11
  66. package/src/ui-tailwind/chart/render/bubble.tsx +6 -2
  67. package/src/ui-tailwind/chart/render/combo.tsx +37 -4
  68. package/src/ui-tailwind/chart/render/line.tsx +28 -5
  69. package/src/ui-tailwind/chart/render/pie.tsx +36 -16
  70. package/src/ui-tailwind/chart/render/progressive-render.ts +8 -1
  71. package/src/ui-tailwind/chart/render/scatter.tsx +9 -4
  72. package/src/ui-tailwind/chrome/avatar-initials.ts +15 -0
  73. package/src/ui-tailwind/chrome/tw-comment-preview.tsx +3 -1
  74. package/src/ui-tailwind/chrome/tw-context-menu.tsx +14 -0
  75. package/src/ui-tailwind/chrome/tw-selection-tool-host.tsx +3 -2
  76. package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +30 -11
  77. package/src/ui-tailwind/chrome/tw-shortcut-hint.tsx +15 -2
  78. package/src/ui-tailwind/chrome/tw-suggestion-card.tsx +1 -1
  79. package/src/ui-tailwind/chrome/tw-table-context-toolbar.tsx +24 -7
  80. package/src/ui-tailwind/chrome/tw-table-grip-layer.tsx +31 -12
  81. package/src/ui-tailwind/chrome-overlay/page-border-resolver.ts +211 -0
  82. package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +24 -0
  83. package/src/ui-tailwind/chrome-overlay/tw-comment-balloon-layer.tsx +74 -0
  84. package/src/ui-tailwind/chrome-overlay/tw-locked-block-layer.tsx +65 -0
  85. package/src/ui-tailwind/chrome-overlay/tw-object-selection-overlay.tsx +157 -0
  86. package/src/ui-tailwind/chrome-overlay/tw-page-border-overlay.tsx +233 -0
  87. package/src/ui-tailwind/chrome-overlay/tw-page-stack-overlay-layer.tsx +135 -13
  88. package/src/ui-tailwind/chrome-overlay/tw-revision-margin-bar-layer.tsx +51 -0
  89. package/src/ui-tailwind/chrome-overlay/tw-scope-card-layer.tsx +12 -4
  90. package/src/ui-tailwind/chrome-overlay/tw-scope-card.tsx +32 -12
  91. package/src/ui-tailwind/chrome-overlay/tw-toc-outline-sidebar.tsx +133 -0
  92. package/src/ui-tailwind/editor-surface/chart-node-view.tsx +49 -10
  93. package/src/ui-tailwind/editor-surface/float-wrap-resolver.ts +119 -0
  94. package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +236 -9
  95. package/src/ui-tailwind/editor-surface/pm-schema.ts +214 -11
  96. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +32 -2
  97. package/src/ui-tailwind/editor-surface/shape-renderer.ts +206 -0
  98. package/src/ui-tailwind/editor-surface/surface-layer.ts +66 -0
  99. package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +29 -0
  100. package/src/ui-tailwind/editor-surface/tw-segment-view.tsx +7 -1
  101. package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +22 -6
  102. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +10 -16
  103. package/src/ui-tailwind/review/tw-health-panel.tsx +0 -25
  104. package/src/ui-tailwind/review/tw-rail-card.tsx +38 -17
  105. package/src/ui-tailwind/review/tw-review-rail.tsx +2 -2
  106. package/src/ui-tailwind/review/tw-revision-sidebar.tsx +5 -12
  107. package/src/ui-tailwind/review/tw-workflow-tab.tsx +2 -2
  108. package/src/ui-tailwind/theme/editor-theme.css +1 -0
  109. package/src/ui-tailwind/theme/tokens.css +6 -0
  110. package/src/ui-tailwind/theme/tokens.ts +10 -0
  111. package/src/ui-tailwind/tw-review-workspace.tsx +23 -0
  112. package/src/validation/compatibility-engine.ts +2 -0
  113. package/src/validation/docx-comment-proof.ts +12 -3
@@ -1,5 +1,7 @@
1
1
  import { twip } from "./twip.ts";
2
2
  import { escapeXmlAttribute } from "./escape-xml-attribute.ts";
3
+ import { emitPropertyGrabBag } from "../ooxml/property-grab-bag.ts";
4
+ import type { UnknownPropertyChild } from "../../model/canonical-document.ts";
3
5
 
4
6
  interface TableWidthLike {
5
7
  value: number;
@@ -66,6 +68,7 @@ interface TableFloatingPropertiesLike {
66
68
 
67
69
  interface TablePropertiesLike {
68
70
  propertiesXml?: string;
71
+ unknownPropertyChildren?: UnknownPropertyChild[];
69
72
  styleId?: string;
70
73
  width?: TableWidthLike;
71
74
  alignment?: string;
@@ -83,6 +86,7 @@ interface TablePropertiesLike {
83
86
 
84
87
  interface TableRowPropertiesLike {
85
88
  propertiesXml?: string;
89
+ unknownPropertyChildren?: UnknownPropertyChild[];
86
90
  gridBefore?: number;
87
91
  widthBefore?: TableWidthLike;
88
92
  gridAfter?: number;
@@ -97,6 +101,7 @@ interface TableRowPropertiesLike {
97
101
 
98
102
  interface TableCellPropertiesLike {
99
103
  propertiesXml?: string;
104
+ unknownPropertyChildren?: UnknownPropertyChild[];
100
105
  width?: TableWidthLike;
101
106
  gridSpan?: number;
102
107
  verticalMerge?: "restart" | "continue";
@@ -166,6 +171,7 @@ export function serializeTablePropertiesXml(table: TablePropertiesLike): string
166
171
  return mergePropertiesXml(
167
172
  "w:tblPr",
168
173
  table.propertiesXml,
174
+ table.unknownPropertyChildren,
169
175
  buildTablePropertiesInnerXml(table),
170
176
  TABLE_PROPERTY_STRIP_SPEC,
171
177
  );
@@ -175,6 +181,7 @@ export function serializeTableRowPropertiesXml(row: TableRowPropertiesLike): str
175
181
  return mergePropertiesXml(
176
182
  "w:trPr",
177
183
  row.propertiesXml,
184
+ row.unknownPropertyChildren,
178
185
  buildTableRowPropertiesInnerXml(row),
179
186
  ROW_PROPERTY_STRIP_SPEC,
180
187
  );
@@ -184,21 +191,29 @@ export function serializeTableCellPropertiesXml(cell: TableCellPropertiesLike):
184
191
  return mergePropertiesXml(
185
192
  "w:tcPr",
186
193
  cell.propertiesXml,
194
+ cell.unknownPropertyChildren,
187
195
  buildTableCellPropertiesInnerXml(cell),
188
196
  CELL_PROPERTY_STRIP_SPEC,
189
197
  );
190
198
  }
191
199
 
200
+ // Phase 7 Slice A — `unknownPropertyChildren` (typed grab-bag) is the
201
+ // preferred path. The legacy `existingXml` regex-strip fallback only fires
202
+ // for snapshots that pre-date the typed retrofit (no `unknownPropertyChildren`
203
+ // captured). Once persisted snapshots are migrated and tests no longer
204
+ // reference `propertiesXml`, the fallback branch and `stripKnownProperties`
205
+ // helper can be retired entirely.
192
206
  function mergePropertiesXml(
193
207
  tagName: "w:tblPr" | "w:trPr" | "w:tcPr",
194
208
  existingXml: string | undefined,
209
+ unknownPropertyChildren: readonly UnknownPropertyChild[] | undefined,
195
210
  supportedInnerXml: string,
196
211
  stripSpec: PropertyStripSpec,
197
212
  ): string {
198
- const preservedInnerXml = stripKnownProperties(
199
- extractWrappedChildren(tagName, existingXml),
200
- stripSpec,
201
- );
213
+ const preservedInnerXml =
214
+ unknownPropertyChildren !== undefined
215
+ ? emitPropertyGrabBag(unknownPropertyChildren)
216
+ : stripKnownProperties(extractWrappedChildren(tagName, existingXml), stripSpec);
202
217
  const mergedInnerXml = [supportedInnerXml, preservedInnerXml]
203
218
  .filter((part) => part.length > 0)
204
219
  .join("");
@@ -287,6 +287,7 @@ function normalizeTable(
287
287
  type: "table",
288
288
  ...(table.styleId ? { styleId: table.styleId } : {}),
289
289
  ...(table.propertiesXml ? { propertiesXml: table.propertiesXml } : {}),
290
+ ...(table.unknownPropertyChildren ? { unknownPropertyChildren: table.unknownPropertyChildren } : {}),
290
291
  gridColumns: table.gridColumns,
291
292
  rows,
292
293
  ...(table.width ? { width: table.width } : {}),
@@ -313,6 +314,7 @@ function normalizeTableRow(
313
314
  return {
314
315
  type: "table_row",
315
316
  ...(row.propertiesXml ? { propertiesXml: row.propertiesXml } : {}),
317
+ ...(row.unknownPropertyChildren ? { unknownPropertyChildren: row.unknownPropertyChildren } : {}),
316
318
  ...(row.gridBefore !== undefined ? { gridBefore: row.gridBefore } : {}),
317
319
  ...(row.widthBefore ? { widthBefore: row.widthBefore } : {}),
318
320
  ...(row.gridAfter !== undefined ? { gridAfter: row.gridAfter } : {}),
@@ -343,6 +345,7 @@ function normalizeTableCell(
343
345
  return {
344
346
  type: "table_cell",
345
347
  ...(cell.propertiesXml ? { propertiesXml: cell.propertiesXml } : {}),
348
+ ...(cell.unknownPropertyChildren ? { unknownPropertyChildren: cell.unknownPropertyChildren } : {}),
346
349
  ...(cell.gridSpan ? { gridSpan: cell.gridSpan } : {}),
347
350
  ...(cell.verticalMerge ? { verticalMerge: cell.verticalMerge } : {}),
348
351
  ...(cell.width ? { width: cell.width } : {}),
@@ -454,6 +457,10 @@ function normalizeInlineChildren(
454
457
  normalized.push(normalizeImageNode(node, state));
455
458
  state.cursor += 1;
456
459
  break;
460
+ case "drawing_frame":
461
+ normalized.push(normalizeDrawingFrameNode(node, state));
462
+ state.cursor += 1;
463
+ break;
457
464
  case "hyperlink":
458
465
  normalized.push(normalizeHyperlink(node));
459
466
  state.cursor += measureHyperlink(node);
@@ -617,6 +624,32 @@ function normalizeImageNode(
617
624
  };
618
625
  }
619
626
 
627
+ function normalizeDrawingFrameNode(
628
+ node: Extract<ParsedInlineNode, { type: "drawing_frame" }>,
629
+ state: NormalizationState,
630
+ ): InlineNode {
631
+ if (node.content.type === "picture" && node.content.mediaId) {
632
+ const existingMediaItem = state.media.items[node.content.mediaId];
633
+ const packagePartName =
634
+ typeof node.content.packagePartName === "string" && node.content.packagePartName.length > 0
635
+ ? node.content.packagePartName
636
+ : `/${node.content.mediaId.slice("media:".length)}`;
637
+ const filename = packagePartName.slice(packagePartName.lastIndexOf("/") + 1) || "image.bin";
638
+ state.media.items[node.content.mediaId] = {
639
+ mediaId: node.content.mediaId,
640
+ contentType: existingMediaItem?.contentType ?? "application/octet-stream",
641
+ filename,
642
+ packagePartName,
643
+ relationshipId: node.content.blipRef,
644
+ ...(node.anchor.docPr?.descr ? { altText: node.anchor.docPr.descr } : {}),
645
+ widthEmu: node.anchor.extent.widthEmu,
646
+ heightEmu: node.anchor.extent.heightEmu,
647
+ };
648
+ }
649
+
650
+ return node;
651
+ }
652
+
620
653
  /**
621
654
  * Register a chart/SmartArt preview bitmap in the media catalog so the
622
655
  * surface renderer can resolve `previewMediaId` → `previewSrc` the same
@@ -0,0 +1,182 @@
1
+ import type { AnchorGeometry } from "../../model/canonical-document.ts";
2
+
3
+ interface XmlElementNode {
4
+ type: "element";
5
+ name: string;
6
+ attributes: Record<string, string>;
7
+ children: XmlNode[];
8
+ }
9
+
10
+ interface XmlTextNode {
11
+ type: "text";
12
+ text: string;
13
+ }
14
+
15
+ type XmlNode = XmlElementNode | XmlTextNode;
16
+
17
+ export function parseAnchorGeometry(container: XmlElementNode): AnchorGeometry {
18
+ const tag = localName(container.name);
19
+ const display: "inline" | "floating" = tag === "anchor" ? "floating" : "inline";
20
+
21
+ const extent = readExtent(container);
22
+
23
+ const wrapMode = readWrapMode(container);
24
+
25
+ const positionHEl = findFirstChild(container, "positionH");
26
+ const positionVEl = findFirstChild(container, "positionV");
27
+
28
+ const effectExtentEl = findFirstChild(container, "effectExtent");
29
+ const distMargins = effectExtentEl
30
+ ? {
31
+ top: readIntAttr(effectExtentEl, "t") ?? 0,
32
+ bottom: readIntAttr(effectExtentEl, "b") ?? 0,
33
+ left: readIntAttr(effectExtentEl, "l") ?? 0,
34
+ right: readIntAttr(effectExtentEl, "r") ?? 0,
35
+ }
36
+ : undefined;
37
+
38
+ const relativeHeight =
39
+ display === "floating" ? readIntAttr(container, "relativeHeight") : undefined;
40
+
41
+ const behindDoc =
42
+ display === "floating" ? readBoolAttr(container, "behindDoc") : undefined;
43
+ const layoutInCell =
44
+ display === "floating" ? readBoolAttr(container, "layoutInCell") : undefined;
45
+ const allowOverlap =
46
+ display === "floating" ? readBoolAttr(container, "allowOverlap") : undefined;
47
+ const simplePos =
48
+ display === "floating" ? readBoolAttr(container, "simplePos") : undefined;
49
+
50
+ const docPrEl = findFirstChild(container, "docPr");
51
+ const docPr = docPrEl
52
+ ? {
53
+ id: docPrEl.attributes.id ?? "",
54
+ ...(docPrEl.attributes.name ? { name: docPrEl.attributes.name } : {}),
55
+ ...(docPrEl.attributes.descr ? { descr: docPrEl.attributes.descr } : {}),
56
+ }
57
+ : undefined;
58
+
59
+ const geometry: AnchorGeometry = {
60
+ display,
61
+ extent,
62
+ wrapMode,
63
+ };
64
+
65
+ if (positionHEl) geometry.positionH = readAxisPosition(positionHEl);
66
+ if (positionVEl) geometry.positionV = readAxisPosition(positionVEl);
67
+ if (distMargins) geometry.distMargins = distMargins;
68
+ if (relativeHeight !== undefined) geometry.relativeHeight = relativeHeight;
69
+ if (behindDoc !== undefined) geometry.behindDoc = behindDoc;
70
+ if (layoutInCell !== undefined) geometry.layoutInCell = layoutInCell;
71
+ if (allowOverlap !== undefined) geometry.allowOverlap = allowOverlap;
72
+ if (simplePos !== undefined) geometry.simplePos = simplePos;
73
+ if (docPr) geometry.docPr = docPr;
74
+
75
+ return geometry;
76
+ }
77
+
78
+ function readExtent(container: XmlElementNode): AnchorGeometry["extent"] {
79
+ const extentEl = findFirstChild(container, "extent");
80
+ if (extentEl) {
81
+ return {
82
+ widthEmu: readIntAttr(extentEl, "cx") ?? 0,
83
+ heightEmu: readIntAttr(extentEl, "cy") ?? 0,
84
+ };
85
+ }
86
+
87
+ const transform = findFirstDescendant(container, "xfrm");
88
+ const shapeExtent = transform ? findFirstDescendant(transform, "ext") : undefined;
89
+ return {
90
+ widthEmu: shapeExtent ? readIntAttr(shapeExtent, "cx") ?? 0 : 0,
91
+ heightEmu: shapeExtent ? readIntAttr(shapeExtent, "cy") ?? 0 : 0,
92
+ };
93
+ }
94
+
95
+ function readWrapMode(
96
+ container: XmlElementNode,
97
+ ): AnchorGeometry["wrapMode"] {
98
+ for (const child of container.children) {
99
+ if (child.type !== "element") continue;
100
+ const name = localName(child.name);
101
+ if (name === "wrapNone") return "none";
102
+ if (name === "wrapSquare") return "square";
103
+ if (name === "wrapTight") return "tight";
104
+ if (name === "wrapThrough") return "through";
105
+ if (name === "wrapTopAndBottom") return "topAndBottom";
106
+ }
107
+ return "none";
108
+ }
109
+
110
+ function readAxisPosition(
111
+ el: XmlElementNode,
112
+ ): { relativeFrom: string; align?: string; offset?: number } {
113
+ const relativeFrom =
114
+ el.attributes.relativeFrom ?? el.attributes["wp:relativeFrom"] ?? "";
115
+ const alignEl = findFirstChild(el, "align");
116
+ const posOffsetEl = findFirstChild(el, "posOffset");
117
+
118
+ const result: { relativeFrom: string; align?: string; offset?: number } = {
119
+ relativeFrom,
120
+ };
121
+ if (alignEl) {
122
+ const text = extractText(alignEl).trim();
123
+ if (text) result.align = text;
124
+ }
125
+ if (posOffsetEl) {
126
+ const val = parseInt(extractText(posOffsetEl).trim(), 10);
127
+ if (Number.isFinite(val)) result.offset = val;
128
+ }
129
+ return result;
130
+ }
131
+
132
+ function findFirstChild(
133
+ node: XmlElementNode,
134
+ local: string,
135
+ ): XmlElementNode | undefined {
136
+ for (const child of node.children) {
137
+ if (child.type === "element" && localName(child.name) === local) return child;
138
+ }
139
+ return undefined;
140
+ }
141
+
142
+ function findFirstDescendant(
143
+ node: XmlElementNode,
144
+ local: string,
145
+ ): XmlElementNode | undefined {
146
+ for (const child of node.children) {
147
+ if (child.type !== "element") continue;
148
+ if (localName(child.name) === local) return child;
149
+ const found = findFirstDescendant(child, local);
150
+ if (found) return found;
151
+ }
152
+ return undefined;
153
+ }
154
+
155
+ function localName(name: string): string {
156
+ const i = name.indexOf(":");
157
+ return i >= 0 ? name.slice(i + 1) : name;
158
+ }
159
+
160
+ function extractText(node: XmlElementNode): string {
161
+ return node.children
162
+ .map((c) => (c.type === "text" ? c.text : extractText(c as XmlElementNode)))
163
+ .join("");
164
+ }
165
+
166
+ function readIntAttr(node: XmlElementNode, name: string): number | undefined {
167
+ const v = node.attributes[name] ?? node.attributes[`wp:${name}`];
168
+ if (v === undefined) return undefined;
169
+ const n = parseInt(v, 10);
170
+ return Number.isFinite(n) ? n : undefined;
171
+ }
172
+
173
+ function readBoolAttr(node: XmlElementNode, name: string): boolean | undefined {
174
+ const v =
175
+ node.attributes[name] ??
176
+ node.attributes[`wp:${name}`] ??
177
+ node.attributes[`w:${name}`];
178
+ if (v === undefined) return undefined;
179
+ return v !== "0" && v !== "false";
180
+ }
181
+
182
+ export { type XmlElementNode as AnchorXmlElement };
@@ -0,0 +1,319 @@
1
+ import type { OpcRelationship } from "./part-manifest.ts";
2
+ import { normalizePartPath, resolveRelationshipTarget } from "./part-manifest.ts";
3
+ import type { InlineMediaPart } from "./parse-inline-media.ts";
4
+ import type { ChartPartLookup } from "./parse-complex-content.ts";
5
+ import type { DrawingFrameNode, AnchorGeometry } from "../../model/canonical-document.ts";
6
+ import { parseAnchorGeometry } from "./parse-anchor.ts";
7
+ import { parsePicture, type PictureXmlElement } from "./parse-picture.ts";
8
+ import { parseShapeContent, type TxbxBlockParser } from "./parse-shapes.ts";
9
+
10
+ const PICTURE_GRAPHIC_URI =
11
+ "http://schemas.openxmlformats.org/drawingml/2006/picture";
12
+ const CHART_GRAPHIC_URI =
13
+ "http://schemas.openxmlformats.org/drawingml/2006/chart";
14
+ const CHART_GRAPHIC_URI_ALT =
15
+ "http://schemas.microsoft.com/office/drawing/2007/8/2/chart";
16
+ const SMARTART_GRAPHIC_URI =
17
+ "http://schemas.microsoft.com/office/drawing/2007/8/2/diagram";
18
+ const SMARTART_GRAPHIC_URI_ALT =
19
+ "http://schemas.openxmlformats.org/drawingml/2006/diagram";
20
+ const WPS_SHAPE_GRAPHIC_URI =
21
+ "http://schemas.microsoft.com/office/word/2010/wordprocessingShape";
22
+
23
+ export interface ParseDrawingOpts {
24
+ relationships: readonly OpcRelationship[];
25
+ mediaParts?: ReadonlyMap<string, InlineMediaPart>;
26
+ sourcePartPath?: string;
27
+ chartPartLookup?: ChartPartLookup;
28
+ /**
29
+ * CO4 F3.3 — optional recursive parser for `w:txbxContent`. When supplied,
30
+ * parseDrawingFrame forwards it to parseShapeContent so shape text-boxes
31
+ * get fully-parsed block structure on `ShapeContent.txbxBlocks`. Without it
32
+ * only `txbxContentXml` raw preservation is populated.
33
+ */
34
+ blockParser?: TxbxBlockParser;
35
+ }
36
+
37
+ export function parseDrawingFrame(
38
+ drawingXml: string,
39
+ opts: ParseDrawingOpts,
40
+ ): DrawingFrameNode | null {
41
+ const root = parseXml(drawingXml);
42
+
43
+ // F3.1: unwrap mc:AlternateContent to its Choice branch (preferred) or Fallback.
44
+ // Real-world Word output wraps nearly every w:drawing in AlternateContent. The
45
+ // branch we pick determines which anchor/graphicData we descend into.
46
+ const searchRoot = pickAlternateContentBranch(root);
47
+
48
+ // Find wp:anchor or wp:inline in the chosen branch
49
+ const anchor = findFirstDescendant(searchRoot, "anchor");
50
+ const inline = findFirstDescendant(searchRoot, "inline");
51
+ const container = anchor ?? inline;
52
+ if (!container) return null;
53
+
54
+ const geometry: AnchorGeometry = parseAnchorGeometry(container as never);
55
+
56
+ // Locate a:graphicData to determine content type
57
+ const graphicData = findFirstDescendant(searchRoot, "graphicData");
58
+ const uri = graphicData?.attributes.uri ?? "";
59
+
60
+ // F3.5: if we don't recognise the graphicData URI, return null and let the
61
+ // legacy parse chain (parseComplexContentXml / parseShapeXml / parseVmlXml /
62
+ // parseInlineMediaXml) handle it. The new DrawingFrameNode path only
63
+ // short-circuits for known URIs.
64
+ if (!isKnownUri(uri)) return null;
65
+ // WordArt remains on the dedicated legacy `wordart` node path. Only
66
+ // non-WordArt WPS drawings are promoted into DrawingFrame shape content.
67
+ if (uri === WPS_SHAPE_GRAPHIC_URI && isWordArtGraphicData(graphicData)) return null;
68
+
69
+ const content = resolveContent(uri, graphicData, drawingXml, opts);
70
+
71
+ return { type: "drawing_frame", anchor: geometry, content };
72
+ }
73
+
74
+ function isKnownUri(uri: string): boolean {
75
+ return (
76
+ uri === PICTURE_GRAPHIC_URI ||
77
+ uri === CHART_GRAPHIC_URI ||
78
+ uri === CHART_GRAPHIC_URI_ALT ||
79
+ uri === SMARTART_GRAPHIC_URI ||
80
+ uri === SMARTART_GRAPHIC_URI_ALT ||
81
+ uri === WPS_SHAPE_GRAPHIC_URI
82
+ );
83
+ }
84
+
85
+ function isWordArtGraphicData(graphicData: XmlElementNode | undefined): boolean {
86
+ if (!graphicData) return false;
87
+ const wsp = findFirstDescendant(graphicData, "wsp");
88
+ const spPr = wsp ? findFirstChild(wsp, "spPr") : undefined;
89
+ const prstGeom = spPr ? findFirstChild(spPr, "prstGeom") : undefined;
90
+ const geometry = prstGeom?.attributes.prst ?? "";
91
+ return /^text/i.test(geometry);
92
+ }
93
+
94
+ /**
95
+ * If the drawing is wrapped in mc:AlternateContent, return the chosen branch
96
+ * subtree. Prefer Choice (richer content); use Fallback only when Choice
97
+ * contains no graphicData we can parse.
98
+ */
99
+ function pickAlternateContentBranch(root: XmlElementNode): XmlElementNode {
100
+ const alt = findFirstDescendant(root, "AlternateContent");
101
+ if (!alt) return root;
102
+
103
+ const choice = findFirstChild(alt, "Choice");
104
+ const fallback = findFirstChild(alt, "Fallback");
105
+
106
+ if (choice) {
107
+ const choiceGraphicData = findFirstDescendant(choice, "graphicData");
108
+ const choiceUri = choiceGraphicData?.attributes.uri ?? "";
109
+ if (isKnownUri(choiceUri)) return choice;
110
+ }
111
+
112
+ if (fallback) {
113
+ const fallbackGraphicData = findFirstDescendant(fallback, "graphicData");
114
+ const fallbackUri = fallbackGraphicData?.attributes.uri ?? "";
115
+ if (isKnownUri(fallbackUri)) return fallback;
116
+ }
117
+
118
+ // Neither branch has a known URI — return Choice (if any) so caller can emit
119
+ // opaque preservation; otherwise the original root.
120
+ return choice ?? fallback ?? root;
121
+ }
122
+
123
+ function resolveContent(
124
+ uri: string,
125
+ graphicData: XmlElementNode | undefined,
126
+ rawXml: string,
127
+ opts: ParseDrawingOpts,
128
+ ): DrawingFrameNode["content"] {
129
+ if (uri === PICTURE_GRAPHIC_URI) {
130
+ if (graphicData) {
131
+ const pic = parsePicture(graphicData as PictureXmlElement);
132
+ if (pic) {
133
+ // Resolve mediaId from the relationship map
134
+ const relMap = new Map(opts.relationships.map((r) => [r.id, r]));
135
+ const rel = relMap.get(pic.blipRef);
136
+ if (rel?.type.endsWith("/image")) {
137
+ const partPath = normalizePartPath(
138
+ resolveRelationshipTarget(opts.sourcePartPath ?? "/word/document.xml", rel),
139
+ );
140
+ pic.packagePartName = partPath;
141
+ pic.mediaId = `media:${partPath.slice(1)}`;
142
+ }
143
+ // F4.1 — preserve outer drawing XML for lossless round-trip serialization
144
+ pic.rawXml = rawXml;
145
+ return pic;
146
+ }
147
+ }
148
+ return { type: "opaque", rawXml };
149
+ }
150
+ if (uri === CHART_GRAPHIC_URI || uri === CHART_GRAPHIC_URI_ALT) {
151
+ return { type: "chart_preview", rawXml };
152
+ }
153
+ if (uri === SMARTART_GRAPHIC_URI || uri === SMARTART_GRAPHIC_URI_ALT) {
154
+ return { type: "smartart_preview", rawXml };
155
+ }
156
+ if (uri === WPS_SHAPE_GRAPHIC_URI) {
157
+ if (graphicData) {
158
+ const shape = parseShapeContent(
159
+ graphicData as PictureXmlElement,
160
+ rawXml,
161
+ opts.blockParser,
162
+ );
163
+ if (shape) return shape;
164
+ }
165
+ return { type: "opaque", rawXml };
166
+ }
167
+ void graphicData;
168
+ return { type: "opaque", rawXml };
169
+ }
170
+
171
+ // ── Minimal self-contained XML parser ──────────────────────────────────────
172
+
173
+ interface XmlElementNode {
174
+ type: "element";
175
+ name: string;
176
+ attributes: Record<string, string>;
177
+ children: XmlNode[];
178
+ }
179
+
180
+ interface XmlTextNode {
181
+ type: "text";
182
+ text: string;
183
+ }
184
+
185
+ type XmlNode = XmlElementNode | XmlTextNode;
186
+
187
+ function findFirstChild(
188
+ node: XmlElementNode,
189
+ local: string,
190
+ ): XmlElementNode | undefined {
191
+ for (const child of node.children) {
192
+ if (child.type === "element" && localName(child.name) === local) return child;
193
+ }
194
+ return undefined;
195
+ }
196
+
197
+ function findFirstDescendant(
198
+ node: XmlElementNode,
199
+ local: string,
200
+ ): XmlElementNode | undefined {
201
+ for (const child of node.children) {
202
+ if (child.type !== "element") continue;
203
+ if (localName(child.name) === local) return child;
204
+ const found = findFirstDescendant(child, local);
205
+ if (found) return found;
206
+ }
207
+ return undefined;
208
+ }
209
+
210
+ function localName(name: string): string {
211
+ const i = name.indexOf(":");
212
+ return i >= 0 ? name.slice(i + 1) : name;
213
+ }
214
+
215
+ function parseXml(xml: string): XmlElementNode {
216
+ const root: XmlElementNode = {
217
+ type: "element",
218
+ name: "__root__",
219
+ attributes: {},
220
+ children: [],
221
+ };
222
+ const stack: XmlElementNode[] = [root];
223
+ let cursor = 0;
224
+
225
+ while (cursor < xml.length) {
226
+ if (xml.startsWith("<!--", cursor)) {
227
+ const end = xml.indexOf("-->", cursor);
228
+ cursor = end >= 0 ? end + 3 : xml.length;
229
+ continue;
230
+ }
231
+ if (xml.startsWith("<?", cursor)) {
232
+ const end = xml.indexOf("?>", cursor);
233
+ cursor = end >= 0 ? end + 2 : xml.length;
234
+ continue;
235
+ }
236
+ if (xml[cursor] !== "<") {
237
+ const nextTag = xml.indexOf("<", cursor);
238
+ const end = nextTag >= 0 ? nextTag : xml.length;
239
+ const text = decodeEntities(xml.slice(cursor, end));
240
+ if (text) stack[stack.length - 1]?.children.push({ type: "text", text });
241
+ cursor = end;
242
+ continue;
243
+ }
244
+ if (xml[cursor + 1] === "/") {
245
+ const end = xml.indexOf(">", cursor);
246
+ stack.pop();
247
+ cursor = end + 1;
248
+ continue;
249
+ }
250
+ const tagEnd = findTagEnd(xml, cursor);
251
+ const tagBody = xml.slice(cursor + 1, tagEnd);
252
+ const selfClosing = /\/\s*$/.test(tagBody);
253
+ const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
254
+ const el: XmlElementNode = { type: "element", name, attributes, children: [] };
255
+ stack[stack.length - 1]?.children.push(el);
256
+ if (!selfClosing) stack.push(el);
257
+ cursor = tagEnd + 1;
258
+ }
259
+
260
+ return root;
261
+ }
262
+
263
+ function findTagEnd(xml: string, start: number): number {
264
+ let cursor = start + 1;
265
+ let quote: string | null = null;
266
+ while (cursor < xml.length) {
267
+ const ch = xml[cursor];
268
+ if (quote) {
269
+ if (ch === quote) quote = null;
270
+ } else if (ch === `"` || ch === `'`) {
271
+ quote = ch;
272
+ } else if (ch === ">") {
273
+ return cursor;
274
+ }
275
+ cursor++;
276
+ }
277
+ return xml.length - 1;
278
+ }
279
+
280
+ function parseTag(body: string): { name: string; attributes: Record<string, string> } {
281
+ let i = 0;
282
+ while (i < body.length && /\s/.test(body[i] ?? "")) i++;
283
+ const nameStart = i;
284
+ while (i < body.length && !/\s/.test(body[i] ?? "")) i++;
285
+ const name = body.slice(nameStart, i);
286
+ const attributes: Record<string, string> = {};
287
+ while (i < body.length) {
288
+ while (i < body.length && /\s/.test(body[i] ?? "")) i++;
289
+ if (i >= body.length) break;
290
+ const kStart = i;
291
+ while (i < body.length && !/[\s=]/.test(body[i] ?? "")) i++;
292
+ const key = body.slice(kStart, i);
293
+ while (i < body.length && /\s/.test(body[i] ?? "")) i++;
294
+ if (body[i] !== "=") { attributes[key] = ""; continue; }
295
+ i++;
296
+ while (i < body.length && /\s/.test(body[i] ?? "")) i++;
297
+ const q = body[i];
298
+ if (q !== `"` && q !== `'`) throw new Error(`Bad attr ${key}`);
299
+ i++;
300
+ const vStart = i;
301
+ while (i < body.length && body[i] !== q) i++;
302
+ attributes[key] = decodeEntities(body.slice(vStart, i));
303
+ i++;
304
+ }
305
+ return { name, attributes };
306
+ }
307
+
308
+ function decodeEntities(s: string): string {
309
+ return s.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (_, e) => {
310
+ if (e === "amp") return "&";
311
+ if (e === "lt") return "<";
312
+ if (e === "gt") return ">";
313
+ if (e === "quot") return `"`;
314
+ if (e === "apos") return "'";
315
+ if (e.startsWith("#x")) return String.fromCodePoint(parseInt(e.slice(2), 16));
316
+ if (e.startsWith("#")) return String.fromCodePoint(parseInt(e.slice(1), 10));
317
+ return `&${e};`;
318
+ });
319
+ }