@beyondwork/docx-react-component 1.0.58 → 1.0.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +2 -2
  2. package/package.json +2 -1
  3. package/src/api/awareness-identity-types.ts +4 -2
  4. package/src/api/comment-negotiation-types.ts +4 -1
  5. package/src/api/external-custody-types.ts +16 -0
  6. package/src/api/internal/build-ref-projections.ts +108 -0
  7. package/src/api/package-version.ts +1 -1
  8. package/src/api/participants-types.ts +11 -1
  9. package/src/api/public-types.ts +978 -10
  10. package/src/api/scope-metadata-resolver-types.ts +6 -0
  11. package/src/compare/diff-engine.ts +3 -0
  12. package/src/core/commands/formatting-commands.ts +1 -0
  13. package/src/core/commands/index.ts +225 -16
  14. package/src/core/commands/legacy-form-field-commands.ts +181 -0
  15. package/src/core/commands/table-structure-commands.ts +149 -31
  16. package/src/core/selection/mapping.ts +20 -0
  17. package/src/core/state/editor-state.ts +2 -1
  18. package/src/index.ts +28 -0
  19. package/src/io/docx-session.ts +22 -3
  20. package/src/io/export/export-session.ts +11 -7
  21. package/src/io/export/ooxml-namespaces.ts +47 -0
  22. package/src/io/export/reattach-preserved-parts.ts +4 -16
  23. package/src/io/export/serialize-comments.ts +3 -131
  24. package/src/io/export/serialize-ffdata.ts +89 -0
  25. package/src/io/export/serialize-headers-footers.ts +5 -0
  26. package/src/io/export/serialize-main-document.ts +224 -34
  27. package/src/io/export/serialize-numbering.ts +22 -2
  28. package/src/io/export/serialize-revisions.ts +99 -0
  29. package/src/io/export/serialize-tables.ts +9 -0
  30. package/src/io/export/split-review-boundaries.ts +1 -0
  31. package/src/io/export/table-properties-xml.ts +14 -0
  32. package/src/io/load-scheduler.ts +70 -28
  33. package/src/io/normalize/normalize-text.ts +13 -0
  34. package/src/io/ooxml/_mini-xml.ts +198 -0
  35. package/src/io/ooxml/canonicalize-payload.ts +1 -4
  36. package/src/io/ooxml/chart/chart-style-table.ts +4 -3
  37. package/src/io/ooxml/chart/parse-chart-space.ts +2 -4
  38. package/src/io/ooxml/chart/parse-series.ts +2 -1
  39. package/src/io/ooxml/chart/resolve-color.ts +2 -2
  40. package/src/io/ooxml/chart/types.ts +6 -434
  41. package/src/io/ooxml/comment-presentation-payload.ts +6 -5
  42. package/src/io/ooxml/highlight-colors.ts +8 -5
  43. package/src/io/ooxml/parse-anchor.ts +68 -53
  44. package/src/io/ooxml/parse-comments.ts +14 -142
  45. package/src/io/ooxml/parse-complex-content.ts +3 -106
  46. package/src/io/ooxml/parse-drawing.ts +100 -195
  47. package/src/io/ooxml/parse-ffdata.ts +93 -0
  48. package/src/io/ooxml/parse-fields.ts +7 -146
  49. package/src/io/ooxml/parse-fill.ts +88 -8
  50. package/src/io/ooxml/parse-font-table.ts +5 -105
  51. package/src/io/ooxml/parse-footnotes.ts +28 -152
  52. package/src/io/ooxml/parse-headers-footers.ts +106 -212
  53. package/src/io/ooxml/parse-inline-media.ts +3 -200
  54. package/src/io/ooxml/parse-main-document.ts +180 -217
  55. package/src/io/ooxml/parse-numbering.ts +154 -335
  56. package/src/io/ooxml/parse-object.ts +147 -0
  57. package/src/io/ooxml/parse-ole-relationship.ts +82 -0
  58. package/src/io/ooxml/parse-paragraph-formatting.ts +7 -10
  59. package/src/io/ooxml/parse-picture-sdt.ts +85 -0
  60. package/src/io/ooxml/parse-picture.ts +72 -42
  61. package/src/io/ooxml/parse-revisions.ts +285 -51
  62. package/src/io/ooxml/parse-settings.ts +6 -99
  63. package/src/io/ooxml/parse-shapes.ts +25 -140
  64. package/src/io/ooxml/parse-styles.ts +3 -218
  65. package/src/io/ooxml/parse-tables.ts +76 -256
  66. package/src/io/ooxml/parse-theme.ts +1 -4
  67. package/src/io/ooxml/property-grab-bag.ts +5 -47
  68. package/src/io/ooxml/xml-element-serialize.ts +32 -0
  69. package/src/io/ooxml/xml-parser.ts +183 -0
  70. package/src/legal/bookmarks.ts +1 -1
  71. package/src/legal/cross-references.ts +1 -1
  72. package/src/legal/defined-terms.ts +1 -1
  73. package/src/legal/{_document-root.ts → document-root.ts} +8 -0
  74. package/src/legal/signature-blocks.ts +1 -1
  75. package/src/model/canonical-document.ts +159 -6
  76. package/src/model/chart-types.ts +439 -0
  77. package/src/model/snapshot.ts +3 -1
  78. package/src/review/store/comment-remapping.ts +24 -11
  79. package/src/review/store/revision-actions.ts +482 -2
  80. package/src/review/store/revision-store.ts +15 -0
  81. package/src/review/store/revision-types.ts +76 -0
  82. package/src/runtime/collab/remote-cursor-awareness.ts +24 -0
  83. package/src/runtime/collab/runtime-collab-sync.ts +33 -0
  84. package/src/runtime/diagnostics/build-diagnostic.ts +151 -0
  85. package/src/runtime/diagnostics/code-metadata-table.ts +221 -0
  86. package/src/runtime/document-runtime.ts +476 -34
  87. package/src/runtime/document-search.ts +115 -0
  88. package/src/runtime/edit-ops/index.ts +18 -2
  89. package/src/runtime/footnote-resolver.ts +130 -0
  90. package/src/runtime/layout/layout-engine-instance.ts +31 -4
  91. package/src/runtime/layout/layout-engine-version.ts +37 -1
  92. package/src/runtime/layout/page-graph.ts +14 -1
  93. package/src/runtime/layout/resolved-formatting-state.ts +21 -0
  94. package/src/runtime/numbering-prefix.ts +17 -0
  95. package/src/runtime/query-scopes.ts +5 -8
  96. package/src/runtime/resolved-numbering-geometry.ts +37 -6
  97. package/src/runtime/revision-runtime.ts +27 -1
  98. package/src/runtime/selection/post-edit-validator.ts +60 -6
  99. package/src/runtime/structure-ops/index.ts +20 -4
  100. package/src/runtime/surface-projection.ts +290 -21
  101. package/src/runtime/table-schema.ts +6 -0
  102. package/src/runtime/theme-color-resolver.ts +2 -2
  103. package/src/runtime/units.ts +9 -0
  104. package/src/runtime/workflow-rail-segments.ts +4 -0
  105. package/src/ui/WordReviewEditor.tsx +187 -43
  106. package/src/ui/editor-runtime-boundary.ts +10 -0
  107. package/src/ui/editor-shell-view.tsx +4 -1
  108. package/src/ui/headless/chrome-registry.ts +53 -0
  109. package/src/ui/headless/selection-tool-resolver.ts +11 -1
  110. package/src/ui-tailwind/chrome/chrome-preset-model.ts +13 -0
  111. package/src/ui-tailwind/chrome/tw-command-palette-mount.tsx +96 -0
  112. package/src/ui-tailwind/chrome/tw-context-menu.tsx +2 -1
  113. package/src/ui-tailwind/chrome/tw-image-context-toolbar.tsx +5 -4
  114. package/src/ui-tailwind/chrome/tw-mode-dock.tsx +6 -2
  115. package/src/ui-tailwind/chrome/use-container-breakpoint.ts +111 -0
  116. package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +0 -9
  117. package/src/ui-tailwind/chrome-overlay/tw-object-selection-overlay.tsx +1 -0
  118. package/src/ui-tailwind/chrome-overlay/tw-page-stack-overlay-layer.tsx +6 -7
  119. package/src/ui-tailwind/editor-surface/pm-schema.ts +87 -25
  120. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +9 -0
  121. package/src/ui-tailwind/editor-surface/shape-renderer.ts +76 -14
  122. package/src/ui-tailwind/editor-surface/tw-page-block-view.helpers.ts +18 -1
  123. package/src/ui-tailwind/editor-surface/tw-page-block-view.tsx +2 -0
  124. package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +18 -2
  125. package/src/ui-tailwind/index.ts +9 -0
  126. package/src/ui-tailwind/page-chrome-model.ts +77 -5
  127. package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +56 -1
  128. package/src/ui-tailwind/page-stack/tw-region-block-renderer.tsx +2 -0
  129. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +116 -113
  130. package/src/ui-tailwind/review/tw-review-rail-footer.tsx +2 -2
  131. package/src/ui-tailwind/theme/tokens.ts +14 -0
  132. package/src/ui-tailwind/toolbar/tw-shell-header.tsx +5 -0
  133. package/src/ui-tailwind/tw-review-workspace.tsx +29 -87
  134. package/src/validation/diagnostics.ts +1 -0
@@ -4,8 +4,15 @@ import type { InlineMediaPart } from "./parse-inline-media.ts";
4
4
  import type { ChartPartLookup } from "./parse-complex-content.ts";
5
5
  import type { DrawingFrameNode, AnchorGeometry } from "../../model/canonical-document.ts";
6
6
  import { parseAnchorGeometry } from "./parse-anchor.ts";
7
- import { parsePicture, type PictureXmlElement } from "./parse-picture.ts";
7
+ import { parsePicture } from "./parse-picture.ts";
8
8
  import { parseShapeContent, type TxbxBlockParser } from "./parse-shapes.ts";
9
+ import {
10
+ type XmlElementNode,
11
+ findFirstChild,
12
+ findFirstDescendant,
13
+ localName,
14
+ parseXml,
15
+ } from "./_mini-xml.ts";
9
16
 
10
17
  const PICTURE_GRAPHIC_URI =
11
18
  "http://schemas.openxmlformats.org/drawingml/2006/picture";
@@ -40,34 +47,102 @@ export function parseDrawingFrame(
40
47
  ): DrawingFrameNode | null {
41
48
  const root = parseXml(drawingXml);
42
49
 
43
- // F3.1: unwrap mc:AlternateContent to its Choice branch (preferred) or Fallback.
44
- // Real-world Word output wraps nearly every w:drawing in AlternateContent. The
45
- // branch we pick determines which anchor/graphicData we descend into.
46
- const searchRoot = pickAlternateContentBranch(root);
50
+ // F3.1 / Phase-1.1 B1: mc:AlternateContent handling moved from "pick one branch"
51
+ // to "try branches in order, keep first that yields non-opaque content".
52
+ // This fixes the case where Choice advertises a known graphicData URI but its
53
+ // body is empty / unparseable, and Fallback carries the actual usable content.
54
+ const alt = findFirstDescendant(root, "AlternateContent");
55
+ if (alt) {
56
+ return resolveFromAlternateContent(alt, drawingXml, opts);
57
+ }
58
+ return resolveFromBranch(root, drawingXml, opts);
59
+ }
60
+
61
+ // Phase 4.1 G2 — namespaces whose Choice branches we can parse. An
62
+ // mc:Choice element's Requires="ns1 ns2 …" is considered supported when
63
+ // every listed namespace appears here. Unknown Requires tokens reject
64
+ // the Choice. Expand as CO4 / downstream lanes add coverage.
65
+ const SUPPORTED_REQUIRES_NAMESPACES = new Set<string>([
66
+ "", // no Requires attr = accept
67
+ "c14", // Word chart Choice wrappers still resolve to chart_preview
68
+ "dgm2009", // SmartArt Choice wrappers still resolve to smartart_preview
69
+ "wps", // WordprocessingShape (we parse)
70
+ "wpg", // WordprocessingGroup (wpg is preserved via opaque; Choice-level acceptance is fine)
71
+ "w14", // WordML 2010 extensions
72
+ "w15", // WordML 2012 extensions
73
+ "w16se", // WordML 2016 SE
74
+ "a14", // DrawingML 2010
75
+ "v", // VML (preserved as opaque)
76
+ "o", // VML office extensions (preserved as opaque)
77
+ "wp14", // DrawingML wordproc 2010
78
+ ]);
79
+
80
+ function resolveFromAlternateContent(
81
+ alt: XmlElementNode,
82
+ outerRawXml: string,
83
+ opts: ParseDrawingOpts,
84
+ ): DrawingFrameNode | null {
85
+ // Enumerate ALL Choice elements in document order; ignore ones whose
86
+ // Requires= lists a namespace we don't know how to parse.
87
+ const supportedChoices = alt.children.filter(
88
+ (c): c is XmlElementNode =>
89
+ c.type === "element" && localName(c.name) === "Choice" && isChoiceSupported(c),
90
+ );
91
+ const fallback = findFirstChild(alt, "Fallback");
92
+
93
+ // Try each supported Choice in order; return first that yields non-opaque
94
+ // typed content. This is the mc:AlternateContent spec: readers process
95
+ // Choice elements in order, selecting the first whose Requires is satisfied.
96
+ for (const choice of supportedChoices) {
97
+ const frame = resolveFromBranch(choice, outerRawXml, opts);
98
+ if (frame && frame.content.type !== "opaque") return frame;
99
+ }
100
+
101
+ // Phase 1.1 B1 — no Choice produced typed content; try Fallback.
102
+ const fallbackFrame = fallback ? resolveFromBranch(fallback, outerRawXml, opts) : null;
103
+ if (fallbackFrame && fallbackFrame.content.type !== "opaque") return fallbackFrame;
104
+
105
+ // All branches opaque — preserve whichever typed result exists. First
106
+ // supported Choice takes precedence over Fallback to match Word's
107
+ // declared-order semantic.
108
+ for (const choice of supportedChoices) {
109
+ const frame = resolveFromBranch(choice, outerRawXml, opts);
110
+ if (frame) return frame;
111
+ }
112
+ return fallbackFrame ?? null;
113
+ }
114
+
115
+ function isChoiceSupported(choice: XmlElementNode): boolean {
116
+ const requires = (choice.attributes.Requires ?? "").trim();
117
+ if (!requires) return true;
118
+ return requires.split(/\s+/).every((ns) => SUPPORTED_REQUIRES_NAMESPACES.has(ns));
119
+ }
47
120
 
121
+ function resolveFromBranch(
122
+ branch: XmlElementNode,
123
+ outerRawXml: string,
124
+ opts: ParseDrawingOpts,
125
+ ): DrawingFrameNode | null {
48
126
  // Find wp:anchor or wp:inline in the chosen branch
49
- const anchor = findFirstDescendant(searchRoot, "anchor");
50
- const inline = findFirstDescendant(searchRoot, "inline");
127
+ const anchor = findFirstDescendant(branch, "anchor");
128
+ const inline = findFirstDescendant(branch, "inline");
51
129
  const container = anchor ?? inline;
52
130
  if (!container) return null;
53
131
 
54
- const geometry: AnchorGeometry = parseAnchorGeometry(container as never);
132
+ const geometry: AnchorGeometry = parseAnchorGeometry(container);
55
133
 
56
134
  // Locate a:graphicData to determine content type
57
- const graphicData = findFirstDescendant(searchRoot, "graphicData");
135
+ const graphicData = findFirstDescendant(branch, "graphicData");
58
136
  const uri = graphicData?.attributes.uri ?? "";
59
137
 
60
138
  // F3.5: if we don't recognise the graphicData URI, return null and let the
61
- // legacy parse chain (parseComplexContentXml / parseShapeXml / parseVmlXml /
62
- // parseInlineMediaXml) handle it. The new DrawingFrameNode path only
63
- // short-circuits for known URIs.
139
+ // legacy parse chain handle it.
64
140
  if (!isKnownUri(uri)) return null;
65
141
  // WordArt remains on the dedicated legacy `wordart` node path. Only
66
142
  // non-WordArt WPS drawings are promoted into DrawingFrame shape content.
67
143
  if (uri === WPS_SHAPE_GRAPHIC_URI && isWordArtGraphicData(graphicData)) return null;
68
144
 
69
- const content = resolveContent(uri, graphicData, drawingXml, opts);
70
-
145
+ const content = resolveContent(uri, graphicData, outerRawXml, opts);
71
146
  return { type: "drawing_frame", anchor: geometry, content };
72
147
  }
73
148
 
@@ -91,34 +166,6 @@ function isWordArtGraphicData(graphicData: XmlElementNode | undefined): boolean
91
166
  return /^text/i.test(geometry);
92
167
  }
93
168
 
94
- /**
95
- * If the drawing is wrapped in mc:AlternateContent, return the chosen branch
96
- * subtree. Prefer Choice (richer content); use Fallback only when Choice
97
- * contains no graphicData we can parse.
98
- */
99
- function pickAlternateContentBranch(root: XmlElementNode): XmlElementNode {
100
- const alt = findFirstDescendant(root, "AlternateContent");
101
- if (!alt) return root;
102
-
103
- const choice = findFirstChild(alt, "Choice");
104
- const fallback = findFirstChild(alt, "Fallback");
105
-
106
- if (choice) {
107
- const choiceGraphicData = findFirstDescendant(choice, "graphicData");
108
- const choiceUri = choiceGraphicData?.attributes.uri ?? "";
109
- if (isKnownUri(choiceUri)) return choice;
110
- }
111
-
112
- if (fallback) {
113
- const fallbackGraphicData = findFirstDescendant(fallback, "graphicData");
114
- const fallbackUri = fallbackGraphicData?.attributes.uri ?? "";
115
- if (isKnownUri(fallbackUri)) return fallback;
116
- }
117
-
118
- // Neither branch has a known URI — return Choice (if any) so caller can emit
119
- // opaque preservation; otherwise the original root.
120
- return choice ?? fallback ?? root;
121
- }
122
169
 
123
170
  function resolveContent(
124
171
  uri: string,
@@ -128,12 +175,16 @@ function resolveContent(
128
175
  ): DrawingFrameNode["content"] {
129
176
  if (uri === PICTURE_GRAPHIC_URI) {
130
177
  if (graphicData) {
131
- const pic = parsePicture(graphicData as PictureXmlElement);
178
+ const pic = parsePicture(graphicData);
132
179
  if (pic) {
133
- // Resolve mediaId from the relationship map
180
+ // Resolve mediaId from the relationship map. For embedded blips
181
+ // (r:embed) the relationship target is a package-local media part,
182
+ // so we normalize + register a mediaId. For linked blips (G4 / r:link)
183
+ // the relationship target is an external URL — we skip mediaId
184
+ // (leaving it undefined so surface-projection flags state: "missing").
134
185
  const relMap = new Map(opts.relationships.map((r) => [r.id, r]));
135
186
  const rel = relMap.get(pic.blipRef);
136
- if (rel?.type.endsWith("/image")) {
187
+ if (!pic.isLinked && rel?.type.endsWith("/image")) {
137
188
  const partPath = normalizePartPath(
138
189
  resolveRelationshipTarget(opts.sourcePartPath ?? "/word/document.xml", rel),
139
190
  );
@@ -156,7 +207,7 @@ function resolveContent(
156
207
  if (uri === WPS_SHAPE_GRAPHIC_URI) {
157
208
  if (graphicData) {
158
209
  const shape = parseShapeContent(
159
- graphicData as PictureXmlElement,
210
+ graphicData,
160
211
  rawXml,
161
212
  opts.blockParser,
162
213
  );
@@ -168,152 +219,6 @@ function resolveContent(
168
219
  return { type: "opaque", rawXml };
169
220
  }
170
221
 
171
- // ── Minimal self-contained XML parser ──────────────────────────────────────
172
-
173
- interface XmlElementNode {
174
- type: "element";
175
- name: string;
176
- attributes: Record<string, string>;
177
- children: XmlNode[];
178
- }
179
-
180
- interface XmlTextNode {
181
- type: "text";
182
- text: string;
183
- }
184
-
185
- type XmlNode = XmlElementNode | XmlTextNode;
186
-
187
- function findFirstChild(
188
- node: XmlElementNode,
189
- local: string,
190
- ): XmlElementNode | undefined {
191
- for (const child of node.children) {
192
- if (child.type === "element" && localName(child.name) === local) return child;
193
- }
194
- return undefined;
195
- }
196
-
197
- function findFirstDescendant(
198
- node: XmlElementNode,
199
- local: string,
200
- ): XmlElementNode | undefined {
201
- for (const child of node.children) {
202
- if (child.type !== "element") continue;
203
- if (localName(child.name) === local) return child;
204
- const found = findFirstDescendant(child, local);
205
- if (found) return found;
206
- }
207
- return undefined;
208
- }
209
-
210
- function localName(name: string): string {
211
- const i = name.indexOf(":");
212
- return i >= 0 ? name.slice(i + 1) : name;
213
- }
214
-
215
- function parseXml(xml: string): XmlElementNode {
216
- const root: XmlElementNode = {
217
- type: "element",
218
- name: "__root__",
219
- attributes: {},
220
- children: [],
221
- };
222
- const stack: XmlElementNode[] = [root];
223
- let cursor = 0;
224
-
225
- while (cursor < xml.length) {
226
- if (xml.startsWith("<!--", cursor)) {
227
- const end = xml.indexOf("-->", cursor);
228
- cursor = end >= 0 ? end + 3 : xml.length;
229
- continue;
230
- }
231
- if (xml.startsWith("<?", cursor)) {
232
- const end = xml.indexOf("?>", cursor);
233
- cursor = end >= 0 ? end + 2 : xml.length;
234
- continue;
235
- }
236
- if (xml[cursor] !== "<") {
237
- const nextTag = xml.indexOf("<", cursor);
238
- const end = nextTag >= 0 ? nextTag : xml.length;
239
- const text = decodeEntities(xml.slice(cursor, end));
240
- if (text) stack[stack.length - 1]?.children.push({ type: "text", text });
241
- cursor = end;
242
- continue;
243
- }
244
- if (xml[cursor + 1] === "/") {
245
- const end = xml.indexOf(">", cursor);
246
- stack.pop();
247
- cursor = end + 1;
248
- continue;
249
- }
250
- const tagEnd = findTagEnd(xml, cursor);
251
- const tagBody = xml.slice(cursor + 1, tagEnd);
252
- const selfClosing = /\/\s*$/.test(tagBody);
253
- const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
254
- const el: XmlElementNode = { type: "element", name, attributes, children: [] };
255
- stack[stack.length - 1]?.children.push(el);
256
- if (!selfClosing) stack.push(el);
257
- cursor = tagEnd + 1;
258
- }
259
-
260
- return root;
261
- }
262
-
263
- function findTagEnd(xml: string, start: number): number {
264
- let cursor = start + 1;
265
- let quote: string | null = null;
266
- while (cursor < xml.length) {
267
- const ch = xml[cursor];
268
- if (quote) {
269
- if (ch === quote) quote = null;
270
- } else if (ch === `"` || ch === `'`) {
271
- quote = ch;
272
- } else if (ch === ">") {
273
- return cursor;
274
- }
275
- cursor++;
276
- }
277
- return xml.length - 1;
278
- }
279
-
280
- function parseTag(body: string): { name: string; attributes: Record<string, string> } {
281
- let i = 0;
282
- while (i < body.length && /\s/.test(body[i] ?? "")) i++;
283
- const nameStart = i;
284
- while (i < body.length && !/\s/.test(body[i] ?? "")) i++;
285
- const name = body.slice(nameStart, i);
286
- const attributes: Record<string, string> = {};
287
- while (i < body.length) {
288
- while (i < body.length && /\s/.test(body[i] ?? "")) i++;
289
- if (i >= body.length) break;
290
- const kStart = i;
291
- while (i < body.length && !/[\s=]/.test(body[i] ?? "")) i++;
292
- const key = body.slice(kStart, i);
293
- while (i < body.length && /\s/.test(body[i] ?? "")) i++;
294
- if (body[i] !== "=") { attributes[key] = ""; continue; }
295
- i++;
296
- while (i < body.length && /\s/.test(body[i] ?? "")) i++;
297
- const q = body[i];
298
- if (q !== `"` && q !== `'`) throw new Error(`Bad attr ${key}`);
299
- i++;
300
- const vStart = i;
301
- while (i < body.length && body[i] !== q) i++;
302
- attributes[key] = decodeEntities(body.slice(vStart, i));
303
- i++;
304
- }
305
- return { name, attributes };
306
- }
307
-
308
- function decodeEntities(s: string): string {
309
- return s.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (_, e) => {
310
- if (e === "amp") return "&";
311
- if (e === "lt") return "<";
312
- if (e === "gt") return ">";
313
- if (e === "quot") return `"`;
314
- if (e === "apos") return "'";
315
- if (e.startsWith("#x")) return String.fromCodePoint(parseInt(e.slice(2), 16));
316
- if (e.startsWith("#")) return String.fromCodePoint(parseInt(e.slice(1), 10));
317
- return `&${e};`;
318
- });
319
- }
222
+ // Phase 6 XML parser helpers imported from ./_mini-xml.ts (previously
223
+ // duplicated inline across four files). See that module for B4 throw-on-
224
+ // unterminated-tag contract and entity-decoding implementation.
@@ -0,0 +1,93 @@
1
+ import type { LegacyFormFieldNode } from "../../model/canonical-document.ts";
2
+ import type { XmlElementNode } from "./xml-element.ts";
3
+ import {
4
+ findChildOptional,
5
+ localName,
6
+ readIntAttr,
7
+ readOnOff,
8
+ readStringAttr,
9
+ } from "./xml-attr-helpers.ts";
10
+
11
+ /**
12
+ * Parse a <w:ffData> child of a <w:fldChar w:fldCharType="begin"> element.
13
+ *
14
+ * Returns undefined if no <w:ffData> child is present (most fields don't have one).
15
+ * Only textInput, checkBox, and ddList kinds are modelled; rawXml preserves
16
+ * everything verbatim for round-trip fidelity.
17
+ */
18
+ export function parseFFDataFromFldChar(
19
+ fldCharEl: XmlElementNode,
20
+ sourceXml: string,
21
+ ): LegacyFormFieldNode | undefined {
22
+ const ffDataEl = findChildOptional(fldCharEl, "ffData");
23
+ if (!ffDataEl) return undefined;
24
+
25
+ const rawXml = sourceXml.slice(ffDataEl.start, ffDataEl.end);
26
+
27
+ const nameEl = findChildOptional(ffDataEl, "name");
28
+ const name = nameEl ? readStringAttr(nameEl, "w:val") : undefined;
29
+ const enabledEl = findChildOptional(ffDataEl, "enabled");
30
+ const enabled = enabledEl !== undefined ? readOnOff(enabledEl) : undefined;
31
+ const calcOnExitEl = findChildOptional(ffDataEl, "calcOnExit");
32
+ const calcOnExit = calcOnExitEl !== undefined ? readOnOff(calcOnExitEl) : undefined;
33
+
34
+ const base: Pick<LegacyFormFieldNode, "name" | "enabled" | "calcOnExit"> = {
35
+ ...(name !== undefined ? { name } : {}),
36
+ ...(enabled !== undefined ? { enabled } : {}),
37
+ ...(calcOnExit !== undefined ? { calcOnExit } : {}),
38
+ };
39
+
40
+ const textInputEl = findChildOptional(ffDataEl, "textInput");
41
+ if (textInputEl) {
42
+ const defaultEl = findChildOptional(textInputEl, "default");
43
+ const maxLengthEl = findChildOptional(textInputEl, "maxLength");
44
+ const formatEl = findChildOptional(textInputEl, "format");
45
+ return {
46
+ kind: "textInput",
47
+ ...base,
48
+ textInput: {
49
+ ...(defaultEl !== undefined ? { default: readStringAttr(defaultEl, "w:val") } : {}),
50
+ ...(maxLengthEl !== undefined ? { maxLength: readIntAttr(maxLengthEl, "w:val") } : {}),
51
+ ...(formatEl !== undefined ? { format: readStringAttr(formatEl, "w:val") } : {}),
52
+ },
53
+ rawXml,
54
+ };
55
+ }
56
+
57
+ const checkBoxEl = findChildOptional(ffDataEl, "checkBox");
58
+ if (checkBoxEl) {
59
+ const sizeEl = findChildOptional(checkBoxEl, "size");
60
+ const defaultEl = findChildOptional(checkBoxEl, "default");
61
+ const checkedEl = findChildOptional(checkBoxEl, "checked");
62
+ return {
63
+ kind: "checkBox",
64
+ ...base,
65
+ checkBox: {
66
+ ...(sizeEl !== undefined ? { size: readIntAttr(sizeEl, "w:val") } : {}),
67
+ ...(defaultEl !== undefined ? { default: readOnOff(defaultEl) ?? false } : {}),
68
+ ...(checkedEl !== undefined ? { checked: readOnOff(checkedEl) } : {}),
69
+ },
70
+ rawXml,
71
+ };
72
+ }
73
+
74
+ const ddListEl = findChildOptional(ffDataEl, "ddList");
75
+ if (ddListEl) {
76
+ const defaultEl = findChildOptional(ddListEl, "default");
77
+ const listEntries = ddListEl.children
78
+ .filter((c): c is XmlElementNode => c.type === "element" && localName(c.name) === "listEntry")
79
+ .map((c) => readStringAttr(c, "w:val") ?? "");
80
+ return {
81
+ kind: "ddList",
82
+ ...base,
83
+ ddList: {
84
+ ...(defaultEl !== undefined ? { default: readIntAttr(defaultEl, "w:val") } : {}),
85
+ listEntry: listEntries,
86
+ },
87
+ rawXml,
88
+ };
89
+ }
90
+
91
+ // Unknown or empty ffData — preserve via rawXml passthrough
92
+ return { kind: "textInput", ...base, rawXml };
93
+ }
@@ -53,6 +53,9 @@ export interface ParsedBookmarkEndNode {
53
53
 
54
54
  export type ParsedBookmarkNode = ParsedBookmarkStartNode | ParsedBookmarkEndNode;
55
55
 
56
+ import { parseXmlWithOffsets } from "./xml-parser.ts";
57
+ import { localName } from "./xml-attr-helpers.ts";
58
+
56
59
  // ─── Internal XML types ───────────────────────────────────────────────────────
57
60
 
58
61
  interface XmlElementNode {
@@ -86,7 +89,7 @@ export function parseFieldsFromParagraphXml(paragraphXml: string): {
86
89
  simpleFields: ParsedSimpleFieldNode[];
87
90
  bookmarks: ParsedBookmarkNode[];
88
91
  } {
89
- const root = parseXml(paragraphXml);
92
+ const root = parseXmlWithOffsets(paragraphXml) as XmlElementNode;
90
93
  const pEl = findFirstChild(root, "p");
91
94
  const target = pEl ?? root;
92
95
 
@@ -120,7 +123,7 @@ export function parseFieldsFromParagraphXml(paragraphXml: string): {
120
123
  * @param bodyXml Raw XML string of a <w:body> element (or full document).
121
124
  */
122
125
  export function extractComplexFieldsFromBodyXml(bodyXml: string): ParsedComplexFieldNode[] {
123
- const root = parseXml(bodyXml);
126
+ const root = parseXmlWithOffsets(bodyXml) as XmlElementNode;
124
127
  const results: ParsedComplexFieldNode[] = [];
125
128
 
126
129
  // Walk all <w:p> children of the body
@@ -140,7 +143,7 @@ export function extractComplexFieldsFromBodyXml(bodyXml: string): ParsedComplexF
140
143
  * Parse all bookmark start/end nodes from a full document or body XML.
141
144
  */
142
145
  export function extractBookmarksFromBodyXml(bodyXml: string): ParsedBookmarkNode[] {
143
- const root = parseXml(bodyXml);
146
+ const root = parseXmlWithOffsets(bodyXml) as XmlElementNode;
144
147
  const results: ParsedBookmarkNode[] = [];
145
148
  collectBookmarks(root, bodyXml, results);
146
149
  return results;
@@ -174,7 +177,7 @@ export interface FieldGroup {
174
177
  * Fail-closed: a begin with no matching end is silently dropped.
175
178
  */
176
179
  export function streamWalkFieldGroups(bodyXml: string): FieldGroup[] {
177
- const root = parseXml(bodyXml);
180
+ const root = parseXmlWithOffsets(bodyXml) as XmlElementNode;
178
181
  const bodyEl =
179
182
  findFirstChild(root, "body") ??
180
183
  findFirstChild(root, "document") ??
@@ -836,145 +839,3 @@ function findFirstChildEl(node: XmlElementNode, childLocalName: string): XmlElem
836
839
  );
837
840
  }
838
841
 
839
- function localName(name: string): string {
840
- const sep = name.indexOf(":");
841
- return sep >= 0 ? name.slice(sep + 1) : name;
842
- }
843
-
844
- // ─── Minimal XML parser (same pattern as parse-tables.ts) ────────────────────
845
-
846
- function parseXml(xml: string): XmlElementNode {
847
- const root: XmlElementNode = {
848
- type: "element",
849
- name: "__root__",
850
- attributes: {},
851
- children: [],
852
- start: 0,
853
- end: xml.length,
854
- };
855
- const stack: XmlElementNode[] = [root];
856
- let cursor = 0;
857
-
858
- while (cursor < xml.length) {
859
- if (xml.startsWith("<!--", cursor)) {
860
- const end = xml.indexOf("-->", cursor);
861
- cursor = end >= 0 ? end + 3 : xml.length;
862
- continue;
863
- }
864
- if (xml.startsWith("<?", cursor)) {
865
- const end = xml.indexOf("?>", cursor);
866
- cursor = end >= 0 ? end + 2 : xml.length;
867
- continue;
868
- }
869
- if (xml.startsWith("<![CDATA[", cursor)) {
870
- const end = xml.indexOf("]]>", cursor);
871
- const textEnd = end >= 0 ? end : xml.length;
872
- stack[stack.length - 1]?.children.push({
873
- type: "text",
874
- text: xml.slice(cursor + 9, textEnd),
875
- start: cursor,
876
- end: end >= 0 ? end + 3 : xml.length,
877
- });
878
- cursor = end >= 0 ? end + 3 : xml.length;
879
- continue;
880
- }
881
- if (xml[cursor] !== "<") {
882
- const nextTag = xml.indexOf("<", cursor);
883
- const end = nextTag >= 0 ? nextTag : xml.length;
884
- const text = decodeXmlEntities(xml.slice(cursor, end));
885
- if (text.length > 0) {
886
- stack[stack.length - 1]?.children.push({ type: "text", text, start: cursor, end });
887
- }
888
- cursor = end;
889
- continue;
890
- }
891
- if (xml[cursor + 1] === "/") {
892
- const end = xml.indexOf(">", cursor);
893
- if (end < 0) throw new Error("Malformed XML: missing >.");
894
- const name = xml.slice(cursor + 2, end).trim();
895
- const current = stack.pop();
896
- if (!current || localName(current.name) !== localName(name)) {
897
- throw new Error(`Malformed XML: unexpected closing tag </${name}>.`);
898
- }
899
- current.end = end + 1;
900
- cursor = end + 1;
901
- continue;
902
- }
903
- const tagEnd = findTagEnd(xml, cursor);
904
- const tagBody = xml.slice(cursor + 1, tagEnd);
905
- const selfClosing = /\/\s*$/.test(tagBody);
906
- const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
907
- const element: XmlElementNode = {
908
- type: "element",
909
- name,
910
- attributes,
911
- children: [],
912
- start: cursor,
913
- end: tagEnd + 1,
914
- };
915
- stack[stack.length - 1]?.children.push(element);
916
- if (!selfClosing) stack.push(element);
917
- cursor = tagEnd + 1;
918
- }
919
-
920
- if (stack.length !== 1) throw new Error("Malformed XML: unclosed element.");
921
- return root;
922
- }
923
-
924
- function parseTag(tagBody: string): { name: string; attributes: Record<string, string> } {
925
- let cursor = 0;
926
- while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
927
- const nameStart = cursor;
928
- while (cursor < tagBody.length && !/\s/.test(tagBody[cursor] ?? "")) cursor += 1;
929
- const name = tagBody.slice(nameStart, cursor);
930
- const attributes: Record<string, string> = {};
931
-
932
- while (cursor < tagBody.length) {
933
- while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
934
- if (cursor >= tagBody.length) break;
935
- const keyStart = cursor;
936
- while (cursor < tagBody.length && !/[\s=]/.test(tagBody[cursor] ?? "")) cursor += 1;
937
- const key = tagBody.slice(keyStart, cursor);
938
- while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
939
- if (tagBody[cursor] !== "=") { attributes[key] = ""; continue; }
940
- cursor += 1;
941
- while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
942
- const quote = tagBody[cursor];
943
- if (quote !== `"` && quote !== `'`) throw new Error(`Malformed XML attribute ${key}.`);
944
- cursor += 1;
945
- const valueStart = cursor;
946
- while (cursor < tagBody.length && tagBody[cursor] !== quote) cursor += 1;
947
- attributes[key] = decodeXmlEntities(tagBody.slice(valueStart, cursor));
948
- cursor += 1;
949
- }
950
- return { name, attributes };
951
- }
952
-
953
- function findTagEnd(xml: string, start: number): number {
954
- let cursor = start + 1;
955
- let quote: string | null = null;
956
- while (cursor < xml.length) {
957
- const c = xml[cursor];
958
- if (quote) { if (c === quote) quote = null; cursor += 1; continue; }
959
- if (c === `"` || c === `'`) { quote = c; cursor += 1; continue; }
960
- if (c === ">") return cursor;
961
- cursor += 1;
962
- }
963
- throw new Error("Malformed XML: missing >.");
964
- }
965
-
966
- function decodeXmlEntities(value: string): string {
967
- return value.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (match, entity) => {
968
- switch (entity) {
969
- case "amp": return "&";
970
- case "lt": return "<";
971
- case "gt": return ">";
972
- case "quot": return `"`;
973
- case "apos": return `'`;
974
- default:
975
- if (entity.startsWith("#x")) return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
976
- if (entity.startsWith("#")) return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
977
- return match;
978
- }
979
- });
980
- }