@beyondwork/docx-react-component 1.0.47 → 1.0.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +16 -11
  2. package/package.json +30 -41
  3. package/src/api/public-types.ts +199 -13
  4. package/src/compare/diff-engine.ts +4 -0
  5. package/src/core/commands/add-scope.ts +257 -0
  6. package/src/core/commands/formatting-commands.ts +2 -0
  7. package/src/core/commands/index.ts +9 -1
  8. package/src/core/commands/text-commands.ts +3 -1
  9. package/src/core/schema/text-schema.ts +95 -1
  10. package/src/core/selection/anchor-conversion.ts +112 -0
  11. package/src/core/selection/review-anchors.ts +108 -3
  12. package/src/core/state/text-transaction.ts +103 -7
  13. package/src/internal/harness-debug-ports.ts +168 -0
  14. package/src/io/chart-preview-resolver.ts +59 -1
  15. package/src/io/docx-session.ts +226 -38
  16. package/src/io/export/serialize-main-document.ts +46 -0
  17. package/src/io/export/serialize-paragraph-formatting.ts +8 -0
  18. package/src/io/export/serialize-run-formatting.ts +10 -1
  19. package/src/io/export/serialize-settings.ts +421 -0
  20. package/src/io/export/serialize-styles.ts +10 -0
  21. package/src/io/normalize/normalize-text.ts +1 -0
  22. package/src/io/ooxml/chart/chart-style-table.ts +543 -0
  23. package/src/io/ooxml/chart/color-palette.ts +101 -0
  24. package/src/io/ooxml/chart/compose-series-color.ts +147 -0
  25. package/src/io/ooxml/chart/parse-axis.ts +277 -0
  26. package/src/io/ooxml/chart/parse-chart-space.ts +885 -0
  27. package/src/io/ooxml/chart/parse-series.ts +635 -0
  28. package/src/io/ooxml/chart/resolve-color.ts +261 -0
  29. package/src/io/ooxml/chart/types.ts +439 -0
  30. package/src/io/ooxml/parse-block-structure.ts +99 -0
  31. package/src/io/ooxml/parse-complex-content.ts +90 -2
  32. package/src/io/ooxml/parse-main-document.ts +156 -1
  33. package/src/io/ooxml/parse-paragraph-formatting.ts +46 -0
  34. package/src/io/ooxml/parse-run-formatting.ts +49 -0
  35. package/src/io/ooxml/parse-scope-markers.ts +184 -0
  36. package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
  37. package/src/io/ooxml/parse-settings.ts +97 -1
  38. package/src/io/ooxml/parse-styles.ts +65 -0
  39. package/src/io/ooxml/parse-theme.ts +2 -127
  40. package/src/io/ooxml/property-grab-bag.ts +211 -0
  41. package/src/io/ooxml/xml-attr-helpers.ts +59 -1
  42. package/src/io/ooxml/xml-parser.ts +142 -0
  43. package/src/model/canonical-document.ts +160 -0
  44. package/src/model/scope-markers.ts +144 -0
  45. package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
  46. package/src/runtime/collab/checkpoint-election.ts +75 -0
  47. package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
  48. package/src/runtime/collab/checkpoint-store.ts +115 -0
  49. package/src/runtime/collab/event-types.ts +27 -0
  50. package/src/runtime/collab/index.ts +29 -0
  51. package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
  52. package/src/runtime/collab/runtime-collab-sync.ts +330 -0
  53. package/src/runtime/collab/workflow-shared.ts +247 -0
  54. package/src/runtime/document-locations.ts +1 -9
  55. package/src/runtime/document-outline.ts +1 -9
  56. package/src/runtime/document-runtime.ts +288 -65
  57. package/src/runtime/editor-surface/capabilities.ts +63 -50
  58. package/src/runtime/hyperlink-color-resolver.ts +119 -0
  59. package/src/runtime/layout/layout-engine-version.ts +8 -1
  60. package/src/runtime/prerender/cache-envelope.ts +19 -7
  61. package/src/runtime/prerender/cache-key.ts +25 -14
  62. package/src/runtime/prerender/canonical-document-hash.ts +63 -0
  63. package/src/runtime/prerender/customxml-cache.ts +211 -0
  64. package/src/runtime/prerender/customxml-probe.ts +78 -0
  65. package/src/runtime/prerender/prerender-document.ts +74 -7
  66. package/src/runtime/scope-resolver.ts +148 -0
  67. package/src/runtime/scope-tag-registry.ts +10 -0
  68. package/src/runtime/surface-projection.ts +102 -37
  69. package/src/runtime/theme-color-resolver.ts +188 -0
  70. package/src/runtime/workflow-markup.ts +7 -18
  71. package/src/ui/WordReviewEditor.tsx +48 -2
  72. package/src/ui/editor-runtime-boundary.ts +42 -1
  73. package/src/ui/headless/selection-helpers.ts +10 -23
  74. package/src/ui/runtime-shortcut-dispatch.ts +12 -7
  75. package/src/ui/unsupported-previews-policy.ts +23 -0
  76. package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +10 -0
  77. package/src/ui-tailwind/editor-surface/perf-probe.ts +1 -0
  78. package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +47 -0
  79. package/src/ui-tailwind/page-stack/use-visible-block-range.ts +88 -0
  80. package/src/ui-tailwind/tw-review-workspace.tsx +16 -1
@@ -153,6 +153,7 @@ export function parseStylesXml(xml: string): ParseStylesResult {
153
153
  switch (styleType) {
154
154
  case "paragraph": {
155
155
  const nextStyle = readLinkedStyleId(child, "next");
156
+ const linkedStyleId = readLinkedStyleId(child, "link");
156
157
  const outlineLevel = readParagraphStyleOutlineLevel(child);
157
158
  const numbering = readParagraphStyleNumbering(child);
158
159
  const pPrNode = findChildElementOptional(child, "pPr");
@@ -170,10 +171,12 @@ export function parseStylesXml(xml: string): ParseStylesResult {
170
171
  ...(numbering ? { numbering } : {}),
171
172
  ...(paragraphProperties ? { paragraphProperties } : {}),
172
173
  ...(runProperties ? { runProperties } : {}),
174
+ ...(linkedStyleId ? { linkedStyleId } : {}),
173
175
  };
174
176
  break;
175
177
  }
176
178
  case "character": {
179
+ const linkedStyleId = readLinkedStyleId(child, "link");
177
180
  const rPrNode = findChildElementOptional(child, "rPr");
178
181
  const runProperties = readRunProperties(rPrNode);
179
182
  characters[styleId] = {
@@ -183,6 +186,7 @@ export function parseStylesXml(xml: string): ParseStylesResult {
183
186
  isDefault,
184
187
  ...(basedOn ? { basedOn } : {}),
185
188
  ...(runProperties ? { runProperties } : {}),
189
+ ...(linkedStyleId ? { linkedStyleId } : {}),
186
190
  };
187
191
  break;
188
192
  }
@@ -209,6 +213,8 @@ export function parseStylesXml(xml: string): ParseStylesResult {
209
213
  }
210
214
  }
211
215
 
216
+ resolveStyleLinkReciprocals(paragraphs, characters, diagnostics);
217
+
212
218
  const hasLatent = Object.keys(latentStyles).length > 0;
213
219
  diagnostics.push(
214
220
  `parsed ${Object.keys(paragraphs).length} paragraph, ` +
@@ -250,6 +256,65 @@ function readLinkedStyleId(
250
256
  return el.attributes["w:val"] ?? el.attributes.val ?? undefined;
251
257
  }
252
258
 
259
+ /**
260
+ * Second-pass resolver for `<w:link>` on paragraph ↔ character style pairs.
261
+ *
262
+ * Mirrors LibreOffice's StyleSheetTable.cxx around line 1533 ("Update the
263
+ * styles that were created before their linked styles"): after every style
264
+ * is ingested, walk the two catalogs and synthesize the reciprocal
265
+ * `linkedStyleId` on a partner whose source XML declared no `<w:link>` of
266
+ * its own. This makes the canonical catalog symmetric regardless of source
267
+ * declaration order.
268
+ *
269
+ * Conflict handling is conservative: if a style already declares its own
270
+ * linkedStyleId pointing at a different target, the existing value is left
271
+ * intact and a diagnostic is emitted so a future debugger can trace the
272
+ * conflicting source-side assertions.
273
+ *
274
+ * Dangling references (a `<w:link>` that points at a styleId not in either
275
+ * catalog) are preserved verbatim and logged as a diagnostic — matching
276
+ * LibreOffice's "keep the XML, warn the author" stance.
277
+ */
278
+ function resolveStyleLinkReciprocals(
279
+ paragraphs: Record<string, ParagraphStyleDefinition>,
280
+ characters: Record<string, CharacterStyleDefinition>,
281
+ diagnostics: string[],
282
+ ): void {
283
+ const walkers: Array<{
284
+ catalog:
285
+ | Record<string, ParagraphStyleDefinition>
286
+ | Record<string, CharacterStyleDefinition>;
287
+ partnerCatalog:
288
+ | Record<string, ParagraphStyleDefinition>
289
+ | Record<string, CharacterStyleDefinition>;
290
+ label: string;
291
+ }> = [
292
+ { catalog: paragraphs, partnerCatalog: characters, label: "paragraph" },
293
+ { catalog: characters, partnerCatalog: paragraphs, label: "character" },
294
+ ];
295
+
296
+ for (const { catalog, partnerCatalog, label } of walkers) {
297
+ for (const style of Object.values(catalog)) {
298
+ const target = style.linkedStyleId;
299
+ if (!target) continue;
300
+ const partner = partnerCatalog[target];
301
+ if (!partner) {
302
+ diagnostics.push(
303
+ `style ${label} "${style.styleId}" declares <w:link w:val="${target}"/> but no matching ${label === "paragraph" ? "character" : "paragraph"} style was found; link preserved as dangling`,
304
+ );
305
+ continue;
306
+ }
307
+ if (partner.linkedStyleId === undefined) {
308
+ partner.linkedStyleId = style.styleId;
309
+ } else if (partner.linkedStyleId !== style.styleId) {
310
+ diagnostics.push(
311
+ `style ${label} "${style.styleId}" links to "${target}" but partner already links to "${partner.linkedStyleId}"; partner link retained`,
312
+ );
313
+ }
314
+ }
315
+ }
316
+ }
317
+
253
318
  function readParagraphStyleOutlineLevel(
254
319
  styleNode: XmlElementNode,
255
320
  ): number | undefined {
@@ -4,22 +4,8 @@ import type {
4
4
  ThemeFontScheme,
5
5
  ResolvedTheme,
6
6
  } from "../../model/canonical-document.ts";
7
-
8
- // ---- XML node types (inline, no external dep) ----
9
-
10
- interface XmlElementNode {
11
- type: "element";
12
- name: string;
13
- attributes: Record<string, string>;
14
- children: XmlNode[];
15
- }
16
-
17
- interface XmlTextNode {
18
- type: "text";
19
- text: string;
20
- }
21
-
22
- type XmlNode = XmlElementNode | XmlTextNode;
7
+ import type { XmlElementNode } from "./xml-element.ts";
8
+ import { parseXml } from "./xml-parser.ts";
23
9
 
24
10
  // ---- Well-known DrawingML color slot names ----
25
11
 
@@ -233,114 +219,3 @@ function localName(name: string): string {
233
219
  return idx >= 0 ? name.slice(idx + 1) : name;
234
220
  }
235
221
 
236
- // ---- Minimal XML parser ----
237
-
238
- function parseXml(xml: string): XmlElementNode {
239
- const root: XmlElementNode = {
240
- type: "element",
241
- name: "__root__",
242
- attributes: {},
243
- children: [],
244
- };
245
- const stack: XmlElementNode[] = [root];
246
- let cursor = 0;
247
-
248
- while (cursor < xml.length) {
249
- if (xml.startsWith("<!--", cursor)) {
250
- const end = xml.indexOf("-->", cursor);
251
- cursor = end >= 0 ? end + 3 : xml.length;
252
- continue;
253
- }
254
-
255
- if (xml.startsWith("<?", cursor)) {
256
- const end = xml.indexOf("?>", cursor);
257
- cursor = end >= 0 ? end + 2 : xml.length;
258
- continue;
259
- }
260
-
261
- if (xml.startsWith("<![CDATA[", cursor)) {
262
- const end = xml.indexOf("]]>", cursor);
263
- const textEnd = end >= 0 ? end : xml.length;
264
- stack[stack.length - 1]?.children.push({
265
- type: "text",
266
- text: xml.slice(cursor + 9, textEnd),
267
- });
268
- cursor = end >= 0 ? end + 3 : xml.length;
269
- continue;
270
- }
271
-
272
- if (xml[cursor] !== "<") {
273
- const nextTag = xml.indexOf("<", cursor);
274
- const end = nextTag >= 0 ? nextTag : xml.length;
275
- const text = decodeXmlEntities(xml.slice(cursor, end));
276
- if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
277
- stack[stack.length - 1]?.children.push({ type: "text", text });
278
- }
279
- cursor = end;
280
- continue;
281
- }
282
-
283
- if (xml[cursor + 1] === "/") {
284
- const end = xml.indexOf(">", cursor);
285
- if (end < 0) break;
286
- stack.pop();
287
- cursor = end + 1;
288
- continue;
289
- }
290
-
291
- const tagEnd = xml.indexOf(">", cursor);
292
- if (tagEnd < 0) break;
293
-
294
- const tagContent = xml.slice(cursor + 1, tagEnd);
295
- const selfClosing = tagContent.endsWith("/");
296
- const normalized = selfClosing ? tagContent.slice(0, -1).trimEnd() : tagContent;
297
-
298
- const spaceIndex = normalized.search(/\s/);
299
- const tagName = spaceIndex >= 0 ? normalized.slice(0, spaceIndex) : normalized;
300
- const attrString = spaceIndex >= 0 ? normalized.slice(spaceIndex + 1) : "";
301
- const attributes = parseAttributes(attrString);
302
-
303
- const element: XmlElementNode = {
304
- type: "element",
305
- name: tagName,
306
- attributes,
307
- children: [],
308
- };
309
-
310
- stack[stack.length - 1]?.children.push(element);
311
-
312
- if (!selfClosing) {
313
- stack.push(element);
314
- }
315
-
316
- cursor = tagEnd + 1;
317
- }
318
-
319
- return root;
320
- }
321
-
322
- function parseAttributes(attrString: string): Record<string, string> {
323
- const attrs: Record<string, string> = {};
324
- const pattern = /([A-Za-z_:][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
325
- for (const match of attrString.matchAll(pattern)) {
326
- const name = match[1];
327
- const value = match[3] ?? match[4] ?? "";
328
- if (name) {
329
- attrs[name] = decodeXmlEntities(value);
330
- }
331
- }
332
- return attrs;
333
- }
334
-
335
- function decodeXmlEntities(text: string): string {
336
- return text
337
- .replace(/&amp;/g, "&")
338
- .replace(/&lt;/g, "<")
339
- .replace(/&gt;/g, ">")
340
- .replace(/&quot;/g, '"')
341
- .replace(/&apos;/g, "'")
342
- .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10)))
343
- .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) =>
344
- String.fromCodePoint(Number.parseInt(hex, 16)),
345
- );
346
- }
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Property-level grab-bag primitive for Lane 3 O2.
3
+ *
4
+ * LibreOffice captures unmodelled children / attributes on every OOXML
5
+ * property container (`<w:pPr>`, `<w:rPr>`, `<w:tcPr>`, `<w:trPr>`,
6
+ * `<w:tblPr>`, `<w:sectPr>`) via per-container "grab bags" keyed by
7
+ * element name — see `PropertyMap.hxx:82` and
8
+ * `libreoffice-analysis.md` §2 for the mechanism. On export, every grab
9
+ * bag re-emits verbatim inside its container so the round-trip pipeline
10
+ * does not silently drop extension-namespace properties (`w15:collapsed`,
11
+ * `w16cex:...`, etc.) or attributes Word adds after we parsed its schema.
12
+ *
13
+ * This module is a small, framework-free adapter: per-container parsers
14
+ * supply a descriptor listing modelled child names (and later, modelled
15
+ * attributes on modelled children); the helper returns everything else as
16
+ * raw XML in insertion order. The matching emitter is a one-liner that
17
+ * just joins `rawXml` strings.
18
+ *
19
+ * Scope (O2 Slice 1): per-container child diff only — the unknown-attribute
20
+ * diff on modelled children is a follow-up slice. Today the descriptor's
21
+ * `modelledChildAttributes` is declared but ignored; the helper emits a
22
+ * whole-child entry only when the element's localName is NOT in
23
+ * `modelledChildNames`.
24
+ */
25
+
26
+ /**
27
+ * Input node shape accepted by `capturePropertyGrabBag`. Intentionally
28
+ * minimal so every caller can adapt their own scanner output — per-file
29
+ * parsers in `src/io/ooxml/` each carry a slightly different node shape.
30
+ */
31
+ export interface GrabBagSourceChild {
32
+ /**
33
+ * Local element name (no namespace prefix). E.g. `"kinsoku"` for
34
+ * `<w:kinsoku>`, `"collapsed"` for `<w15:collapsed>`.
35
+ */
36
+ localName: string;
37
+ /**
38
+ * The source XML for the entire child element, including its opening
39
+ * tag, all attributes, any children, and its closing tag (or the
40
+ * self-closing form). Preserved verbatim for re-emission.
41
+ */
42
+ rawXml: string;
43
+ }
44
+
45
+ /**
46
+ * Interop helper for callers that only carry the parsed `XmlElementNode`
47
+ * shape used by `src/io/ooxml/xml-element.ts`. Reconstructs a best-effort
48
+ * `rawXml` string from the parsed tree so parsers that don't track source
49
+ * offsets can still feed the grab-bag helper.
50
+ *
51
+ * The reconstruction preserves element/attribute semantic content and
52
+ * attribute insertion order (since `Record<string, string>` iteration in
53
+ * V8 is insertion-ordered for string keys) but does NOT guarantee
54
+ * byte-identical source preservation: whitespace between elements and
55
+ * attribute quoting style are normalized. For the Slice 1 scope this is
56
+ * the correct trade-off — unmodelled children's semantic content
57
+ * survives, which closes the silent-drop gap.
58
+ */
59
+ export function buildGrabBagSourceChildFromParsed(node: {
60
+ name: string;
61
+ attributes: Record<string, string>;
62
+ children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
63
+ }): GrabBagSourceChild {
64
+ return {
65
+ localName: localNameOf(node.name),
66
+ rawXml: serializeElementToString(node),
67
+ };
68
+ }
69
+
70
+ function localNameOf(qualified: string): string {
71
+ const colon = qualified.indexOf(":");
72
+ return colon < 0 ? qualified : qualified.slice(colon + 1);
73
+ }
74
+
75
+ function escapeAttr(value: string): string {
76
+ return value
77
+ .replace(/&/gu, "&amp;")
78
+ .replace(/</gu, "&lt;")
79
+ .replace(/>/gu, "&gt;")
80
+ .replace(/"/gu, "&quot;");
81
+ }
82
+
83
+ function escapeText(value: string): string {
84
+ return value
85
+ .replace(/&/gu, "&amp;")
86
+ .replace(/</gu, "&lt;")
87
+ .replace(/>/gu, "&gt;");
88
+ }
89
+
90
+ function serializeElementToString(node: {
91
+ name: string;
92
+ attributes: Record<string, string>;
93
+ children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
94
+ }): string {
95
+ const attrs = Object.entries(node.attributes)
96
+ .map(([name, value]) => ` ${name}="${escapeAttr(value)}"`)
97
+ .join("");
98
+ if (node.children.length === 0) {
99
+ return `<${node.name}${attrs}/>`;
100
+ }
101
+ const body = node.children
102
+ .map((child) => {
103
+ if (child.type === "text") return escapeText(child.text);
104
+ return serializeElementToString(
105
+ child as {
106
+ name: string;
107
+ attributes: Record<string, string>;
108
+ children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
109
+ },
110
+ );
111
+ })
112
+ .join("");
113
+ return `<${node.name}${attrs}>${body}</${node.name}>`;
114
+ }
115
+
116
+ /**
117
+ * Descriptor a per-container parser supplies to the helper to declare
118
+ * which child element names it dispatches into its modelled fields.
119
+ * Children not listed here become grab-bag entries.
120
+ */
121
+ export interface PropertyGrabBagDescriptor {
122
+ /**
123
+ * Set of local names the container parser handles natively. Children
124
+ * whose `localName` matches one of these are NOT captured.
125
+ */
126
+ modelledChildNames: ReadonlySet<string>;
127
+ /**
128
+ * Reserved for the follow-up slice: per-modelled-child the set of
129
+ * attributes the parser consumes. The Slice 1 helper ignores this
130
+ * field; Slice 2 will use it to emit attribute-level grab entries on
131
+ * modelled children.
132
+ *
133
+ * Note: table containers (tblPr/trPr/tcPr — O2 Slice 3) currently use
134
+ * a parallel raw-XML mechanism in `src/io/export/table-properties-xml.ts`
135
+ * (`mergePropertiesXml`) that stores the full container XML as a string
136
+ * on `TableNode.propertiesXml`/`TableRowNode.propertiesXml`/
137
+ * `TableCellNode.propertiesXml`. That path cannot participate in the
138
+ * attribute-level grab-bag slice until it is retrofit to emit
139
+ * `UnknownPropertyChild[]` through this descriptor. Tracked as a Lane 3
140
+ * Tier-2 backlog entry — see `docs/plans/lane-3-layout-engine-ooxml-fidelity.md`.
141
+ */
142
+ modelledChildAttributes: ReadonlyMap<string, ReadonlySet<string>>;
143
+ }
144
+
145
+ /**
146
+ * Single grab-bag entry: an unmodelled top-level child captured verbatim
147
+ * so the serializer can re-emit it inside its container without any
148
+ * round-trip loss.
149
+ */
150
+ export interface UnknownPropertyChild {
151
+ /**
152
+ * Qualified element name as it appeared in the source (e.g.
153
+ * `"w:kinsoku"`, `"w15:collapsed"`). Used for diagnostics and for the
154
+ * future attribute-level diff so the emitter can re-open the matching
155
+ * element when needed.
156
+ */
157
+ elementName: string;
158
+ /**
159
+ * Verbatim XML for the child element.
160
+ */
161
+ rawXml: string;
162
+ }
163
+
164
+ /**
165
+ * Walk the container's direct children. Return every child whose
166
+ * `localName` is NOT in `descriptor.modelledChildNames` as a grab-bag
167
+ * entry in source order. Returns `undefined` when no unmodelled children
168
+ * were found — callers should prefer `undefined` over an empty array so
169
+ * the canonical model stays sparse.
170
+ *
171
+ * The helper does NOT inspect attributes or grandchildren. That
172
+ * refinement is reserved for the follow-up slice.
173
+ */
174
+ export function capturePropertyGrabBag(
175
+ children: readonly GrabBagSourceChild[],
176
+ descriptor: PropertyGrabBagDescriptor,
177
+ ): UnknownPropertyChild[] | undefined {
178
+ const bag: UnknownPropertyChild[] = [];
179
+ for (const child of children) {
180
+ if (descriptor.modelledChildNames.has(child.localName)) continue;
181
+ bag.push({
182
+ elementName: extractQualifiedNameFromRawXml(child.rawXml) ?? child.localName,
183
+ rawXml: child.rawXml,
184
+ });
185
+ }
186
+ return bag.length > 0 ? bag : undefined;
187
+ }
188
+
189
+ /**
190
+ * Emit a grab-bag list back into a property container. Just concatenates
191
+ * each entry's `rawXml` in insertion order — the source bytes survive
192
+ * verbatim including attribute order, whitespace inside the element, and
193
+ * namespace prefixes.
194
+ */
195
+ export function emitPropertyGrabBag(
196
+ entries: readonly UnknownPropertyChild[] | undefined,
197
+ ): string {
198
+ if (!entries || entries.length === 0) return "";
199
+ return entries.map((entry) => entry.rawXml).join("");
200
+ }
201
+
202
+ /**
203
+ * Best-effort extraction of the qualified element name from an opening
204
+ * tag — e.g. `<w15:collapsed w:val="1"/>` → `"w15:collapsed"`. Falls back
205
+ * to the caller-supplied `localName` when the raw XML doesn't look like
206
+ * a valid element.
207
+ */
208
+ function extractQualifiedNameFromRawXml(rawXml: string): string | undefined {
209
+ const match = rawXml.match(/^<([^\s/>]+)/u);
210
+ return match?.[1];
211
+ }
@@ -10,7 +10,7 @@
10
10
  * parse-styles.ts, parse-numbering.ts.
11
11
  */
12
12
 
13
- import type { XmlElementNode } from "./xml-element.ts";
13
+ import type { XmlElementNode, XmlNode } from "./xml-element.ts";
14
14
 
15
15
  export function localName(name: string): string {
16
16
  const sep = name.indexOf(":");
@@ -26,6 +26,27 @@ export function findChildOptional(
26
26
  );
27
27
  }
28
28
 
29
+ /**
30
+ * Depth-first search for the first descendant element with the given local name
31
+ * (namespace prefix ignored). Returns undefined if no descendant matches.
32
+ *
33
+ * Useful when the exact parent chain isn't known — e.g. descending into
34
+ * `<w:drawing>` to find a `<c:chart>` regardless of which DrawingML wrapper
35
+ * stands between them.
36
+ */
37
+ export function findFirstDescendant(
38
+ node: XmlElementNode,
39
+ local: string,
40
+ ): XmlElementNode | undefined {
41
+ for (const child of node.children) {
42
+ if (child.type !== "element") continue;
43
+ if (localName(child.name) === local) return child;
44
+ const nested = findFirstDescendant(child, local);
45
+ if (nested) return nested;
46
+ }
47
+ return undefined;
48
+ }
49
+
29
50
  /** ST_OnOff: missing child → undefined; present bare or w:val="1|true|on" → true; w:val="0|false|off" → false. */
30
51
  export function readOnOff(node: XmlElementNode | undefined): boolean | undefined {
31
52
  if (!node) return undefined;
@@ -46,6 +67,43 @@ export function readIntVal(node: XmlElementNode | undefined): number | undefined
46
67
  return Number.isFinite(v) ? v : undefined;
47
68
  }
48
69
 
70
+ /** Read the child's `val` attribute as a float. Returns undefined if missing or not a finite number. */
71
+ export function readFloatVal(node: XmlElementNode | undefined): number | undefined {
72
+ if (!node) return undefined;
73
+ const raw = node.attributes["w:val"] ?? node.attributes.val;
74
+ if (raw === undefined) return undefined;
75
+ const v = Number.parseFloat(raw);
76
+ return Number.isFinite(v) ? v : undefined;
77
+ }
78
+
79
+ /**
80
+ * Return the concatenated text content of the first child element with the
81
+ * given local name. Returns undefined if the child is missing. Returns an
82
+ * empty string if the child exists but has no text.
83
+ */
84
+ export function readStringChild(
85
+ node: XmlElementNode | undefined,
86
+ local: string,
87
+ ): string | undefined {
88
+ if (!node) return undefined;
89
+ const child = findChildOptional(node, local);
90
+ if (!child) return undefined;
91
+ return textContent(child);
92
+ }
93
+
94
+ /** Concatenate all descendant text nodes of an element into a single string. */
95
+ export function textContent(node: XmlElementNode): string {
96
+ let out = "";
97
+ const walk = (children: XmlNode[]): void => {
98
+ for (const c of children) {
99
+ if (c.type === "text") out += c.text;
100
+ else walk(c.children);
101
+ }
102
+ };
103
+ walk(node.children);
104
+ return out;
105
+ }
106
+
49
107
  /** Read an arbitrary attribute from a node as an int, with namespace fallback. */
50
108
  export function readIntAttr(node: XmlElementNode, attr: string): number | undefined {
51
109
  const raw = node.attributes[attr] ?? node.attributes[attr.replace(/^w:/, "")];
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Minimal XML parser shared across OOXML parsers.
3
+ *
4
+ * Handles the subset of XML that Office documents actually emit:
5
+ * elements, attributes, text, comments (stripped), processing instructions
6
+ * (stripped), and CDATA sections. Namespace prefixes are preserved verbatim
7
+ * on element and attribute names — strip with `localName()` from
8
+ * `xml-attr-helpers.ts` when lookups must be prefix-agnostic.
9
+ *
10
+ * Not a full XML conformance parser: does not validate element nesting,
11
+ * does not track source offsets on produced nodes, does not report errors on
12
+ * malformed input beyond best-effort early termination. Sufficient for every
13
+ * OOXML part we import.
14
+ *
15
+ * Originally inlined in `parse-theme.ts`; extracted here so the chart parsers
16
+ * and any future OOXML import path can share the same implementation.
17
+ */
18
+ import type { XmlElementNode } from "./xml-element.ts";
19
+
20
+ const ROOT_TAG = "__root__";
21
+
22
+ /**
23
+ * Parse an XML string into a virtual root element. The root is a synthetic
24
+ * wrapper element whose `children` contain the top-level nodes from the
25
+ * source. Callers typically do `parseXml(xml).children.find(...)` or look up
26
+ * a specific top-level element via `findChildOptional`.
27
+ */
28
+ export function parseXml(xml: string): XmlElementNode {
29
+ const root: XmlElementNode = {
30
+ type: "element",
31
+ name: ROOT_TAG,
32
+ attributes: {},
33
+ children: [],
34
+ };
35
+ const stack: XmlElementNode[] = [root];
36
+ let cursor = 0;
37
+
38
+ while (cursor < xml.length) {
39
+ if (xml.startsWith("<!--", cursor)) {
40
+ const end = xml.indexOf("-->", cursor);
41
+ cursor = end >= 0 ? end + 3 : xml.length;
42
+ continue;
43
+ }
44
+
45
+ if (xml.startsWith("<?", cursor)) {
46
+ const end = xml.indexOf("?>", cursor);
47
+ cursor = end >= 0 ? end + 2 : xml.length;
48
+ continue;
49
+ }
50
+
51
+ if (xml.startsWith("<![CDATA[", cursor)) {
52
+ const end = xml.indexOf("]]>", cursor);
53
+ const textEnd = end >= 0 ? end : xml.length;
54
+ stack[stack.length - 1]?.children.push({
55
+ type: "text",
56
+ text: xml.slice(cursor + 9, textEnd),
57
+ });
58
+ cursor = end >= 0 ? end + 3 : xml.length;
59
+ continue;
60
+ }
61
+
62
+ if (xml[cursor] !== "<") {
63
+ const nextTag = xml.indexOf("<", cursor);
64
+ const end = nextTag >= 0 ? nextTag : xml.length;
65
+ const text = decodeXmlEntities(xml.slice(cursor, end));
66
+ if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
67
+ stack[stack.length - 1]?.children.push({ type: "text", text });
68
+ }
69
+ cursor = end;
70
+ continue;
71
+ }
72
+
73
+ if (xml[cursor + 1] === "/") {
74
+ const end = xml.indexOf(">", cursor);
75
+ if (end < 0) break;
76
+ stack.pop();
77
+ cursor = end + 1;
78
+ continue;
79
+ }
80
+
81
+ const tagEnd = xml.indexOf(">", cursor);
82
+ if (tagEnd < 0) break;
83
+
84
+ const tagContent = xml.slice(cursor + 1, tagEnd);
85
+ const selfClosing = tagContent.endsWith("/");
86
+ const normalized = selfClosing
87
+ ? tagContent.slice(0, -1).trimEnd()
88
+ : tagContent;
89
+
90
+ const spaceIndex = normalized.search(/\s/);
91
+ const tagName =
92
+ spaceIndex >= 0 ? normalized.slice(0, spaceIndex) : normalized;
93
+ const attrString = spaceIndex >= 0 ? normalized.slice(spaceIndex + 1) : "";
94
+ const attributes = parseAttributes(attrString);
95
+
96
+ const element: XmlElementNode = {
97
+ type: "element",
98
+ name: tagName,
99
+ attributes,
100
+ children: [],
101
+ };
102
+
103
+ stack[stack.length - 1]?.children.push(element);
104
+
105
+ if (!selfClosing) {
106
+ stack.push(element);
107
+ }
108
+
109
+ cursor = tagEnd + 1;
110
+ }
111
+
112
+ return root;
113
+ }
114
+
115
+ /** Decode the standard XML entity references plus numeric character references. */
116
+ export function decodeXmlEntities(text: string): string {
117
+ return text
118
+ .replace(/&amp;/g, "&")
119
+ .replace(/&lt;/g, "<")
120
+ .replace(/&gt;/g, ">")
121
+ .replace(/&quot;/g, '"')
122
+ .replace(/&apos;/g, "'")
123
+ .replace(/&#(\d+);/g, (_, dec) =>
124
+ String.fromCodePoint(Number.parseInt(dec, 10)),
125
+ )
126
+ .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) =>
127
+ String.fromCodePoint(Number.parseInt(hex, 16)),
128
+ );
129
+ }
130
+
131
+ function parseAttributes(attrString: string): Record<string, string> {
132
+ const attrs: Record<string, string> = {};
133
+ const pattern = /([A-Za-z_:][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
134
+ for (const match of attrString.matchAll(pattern)) {
135
+ const name = match[1];
136
+ const value = match[3] ?? match[4] ?? "";
137
+ if (name) {
138
+ attrs[name] = decodeXmlEntities(value);
139
+ }
140
+ }
141
+ return attrs;
142
+ }