@beyondwork/docx-react-component 1.0.47 → 1.0.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +16 -11
  2. package/package.json +30 -41
  3. package/src/api/public-types.ts +199 -13
  4. package/src/compare/diff-engine.ts +4 -0
  5. package/src/core/commands/add-scope.ts +257 -0
  6. package/src/core/commands/formatting-commands.ts +2 -0
  7. package/src/core/commands/index.ts +9 -1
  8. package/src/core/commands/text-commands.ts +3 -1
  9. package/src/core/schema/text-schema.ts +95 -1
  10. package/src/core/selection/anchor-conversion.ts +112 -0
  11. package/src/core/selection/review-anchors.ts +108 -3
  12. package/src/core/state/text-transaction.ts +103 -7
  13. package/src/internal/harness-debug-ports.ts +168 -0
  14. package/src/io/chart-preview-resolver.ts +59 -1
  15. package/src/io/docx-session.ts +226 -38
  16. package/src/io/export/serialize-main-document.ts +46 -0
  17. package/src/io/export/serialize-paragraph-formatting.ts +8 -0
  18. package/src/io/export/serialize-run-formatting.ts +10 -1
  19. package/src/io/export/serialize-settings.ts +421 -0
  20. package/src/io/export/serialize-styles.ts +10 -0
  21. package/src/io/normalize/normalize-text.ts +1 -0
  22. package/src/io/ooxml/chart/chart-style-table.ts +543 -0
  23. package/src/io/ooxml/chart/color-palette.ts +101 -0
  24. package/src/io/ooxml/chart/compose-series-color.ts +147 -0
  25. package/src/io/ooxml/chart/parse-axis.ts +277 -0
  26. package/src/io/ooxml/chart/parse-chart-space.ts +885 -0
  27. package/src/io/ooxml/chart/parse-series.ts +635 -0
  28. package/src/io/ooxml/chart/resolve-color.ts +261 -0
  29. package/src/io/ooxml/chart/types.ts +439 -0
  30. package/src/io/ooxml/parse-block-structure.ts +99 -0
  31. package/src/io/ooxml/parse-complex-content.ts +90 -2
  32. package/src/io/ooxml/parse-main-document.ts +156 -1
  33. package/src/io/ooxml/parse-paragraph-formatting.ts +46 -0
  34. package/src/io/ooxml/parse-run-formatting.ts +49 -0
  35. package/src/io/ooxml/parse-scope-markers.ts +184 -0
  36. package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
  37. package/src/io/ooxml/parse-settings.ts +97 -1
  38. package/src/io/ooxml/parse-styles.ts +65 -0
  39. package/src/io/ooxml/parse-theme.ts +2 -127
  40. package/src/io/ooxml/property-grab-bag.ts +211 -0
  41. package/src/io/ooxml/xml-attr-helpers.ts +59 -1
  42. package/src/io/ooxml/xml-parser.ts +142 -0
  43. package/src/model/canonical-document.ts +160 -0
  44. package/src/model/scope-markers.ts +144 -0
  45. package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
  46. package/src/runtime/collab/checkpoint-election.ts +75 -0
  47. package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
  48. package/src/runtime/collab/checkpoint-store.ts +115 -0
  49. package/src/runtime/collab/event-types.ts +27 -0
  50. package/src/runtime/collab/index.ts +29 -0
  51. package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
  52. package/src/runtime/collab/runtime-collab-sync.ts +330 -0
  53. package/src/runtime/collab/workflow-shared.ts +247 -0
  54. package/src/runtime/document-locations.ts +1 -9
  55. package/src/runtime/document-outline.ts +1 -9
  56. package/src/runtime/document-runtime.ts +288 -65
  57. package/src/runtime/editor-surface/capabilities.ts +63 -50
  58. package/src/runtime/hyperlink-color-resolver.ts +119 -0
  59. package/src/runtime/layout/layout-engine-version.ts +8 -1
  60. package/src/runtime/prerender/cache-envelope.ts +19 -7
  61. package/src/runtime/prerender/cache-key.ts +25 -14
  62. package/src/runtime/prerender/canonical-document-hash.ts +63 -0
  63. package/src/runtime/prerender/customxml-cache.ts +211 -0
  64. package/src/runtime/prerender/customxml-probe.ts +78 -0
  65. package/src/runtime/prerender/prerender-document.ts +74 -7
  66. package/src/runtime/scope-resolver.ts +148 -0
  67. package/src/runtime/scope-tag-registry.ts +10 -0
  68. package/src/runtime/surface-projection.ts +102 -37
  69. package/src/runtime/theme-color-resolver.ts +188 -0
  70. package/src/runtime/workflow-markup.ts +7 -18
  71. package/src/ui/WordReviewEditor.tsx +48 -2
  72. package/src/ui/editor-runtime-boundary.ts +42 -1
  73. package/src/ui/headless/selection-helpers.ts +10 -23
  74. package/src/ui/runtime-shortcut-dispatch.ts +12 -7
  75. package/src/ui/unsupported-previews-policy.ts +23 -0
  76. package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +10 -0
  77. package/src/ui-tailwind/editor-surface/perf-probe.ts +1 -0
  78. package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +47 -0
  79. package/src/ui-tailwind/page-stack/use-visible-block-range.ts +88 -0
  80. package/src/ui-tailwind/tw-review-workspace.tsx +16 -1
@@ -0,0 +1,184 @@
1
+ import type {
2
+ CanonicalDocument,
3
+ DocumentRootNode,
4
+ InlineNode,
5
+ } from "../../model/canonical-document.ts";
6
+
7
+ /**
8
+ * Reserved OOXML bookmark-name prefix used to discriminate S1 scope markers
9
+ * from user-authored bookmarks. On export, each scope marker emits as
10
+ * `<w:bookmarkStart w:name="bw:scope:<scopeId>"/>` / `<w:bookmarkEnd/>`. On
11
+ * import, any bookmark whose name starts with this prefix is extracted as a
12
+ * `scope_marker_*` inline node pair and removed from the regular bookmark
13
+ * list so user-facing bookmark APIs stay clean.
14
+ */
15
+ export const SCOPE_MARKER_BOOKMARK_PREFIX = "bw:scope:";
16
+
17
+ export interface ScopeMarkerBookmark {
18
+ /** Serialized bookmark id (shared between start + end in the OOXML pair). */
19
+ bookmarkId: string;
20
+ /** `bw:scope:<scopeId>` — caller applies the prefix via the exported constant. */
21
+ name: string;
22
+ boundary: "start" | "end";
23
+ scopeId: string;
24
+ }
25
+
26
+ /**
27
+ * Walk a canonical document in pre-order and return one pair of bookmark
28
+ * descriptors for each scope-marker pair found. The returned objects are
29
+ * OOXML-flavor (paired `w:id`, `w:name` on start only, end references id)
30
+ * so callers can weave them straight into the `<w:bookmarkStart>` /
31
+ * `<w:bookmarkEnd>` emit path.
32
+ */
33
+ export function serializeScopeMarkersToBookmarks(
34
+ document: CanonicalDocument | Pick<CanonicalDocument, "content">,
35
+ ): ScopeMarkerBookmark[] {
36
+ const root = ("content" in document
37
+ ? (document.content as DocumentRootNode)
38
+ : (document as unknown as DocumentRootNode));
39
+ const out: ScopeMarkerBookmark[] = [];
40
+ let bookmarkIdCounter = 0;
41
+ const scopeIdToBookmarkId = new Map<string, string>();
42
+
43
+ walkInlineNodes(root, (node) => {
44
+ if (node.type === "scope_marker_start") {
45
+ const bookmarkId = String(bookmarkIdCounter);
46
+ bookmarkIdCounter += 1;
47
+ scopeIdToBookmarkId.set(node.scopeId, bookmarkId);
48
+ out.push({
49
+ bookmarkId,
50
+ name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
51
+ boundary: "start",
52
+ scopeId: node.scopeId,
53
+ });
54
+ } else if (node.type === "scope_marker_end") {
55
+ const bookmarkId = scopeIdToBookmarkId.get(node.scopeId) ?? String(bookmarkIdCounter++);
56
+ out.push({
57
+ bookmarkId,
58
+ name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
59
+ boundary: "end",
60
+ scopeId: node.scopeId,
61
+ });
62
+ }
63
+ });
64
+
65
+ return out;
66
+ }
67
+
68
+ export interface ParsedScopeMarkerPair {
69
+ scopeId: string;
70
+ bookmarkId: string;
71
+ startIndex: number;
72
+ endIndex: number;
73
+ }
74
+
75
+ export interface RawBookmark {
76
+ readonly type: "bookmark_start" | "bookmark_end";
77
+ readonly bookmarkId: string;
78
+ readonly name?: string;
79
+ readonly index: number;
80
+ }
81
+
82
+ /**
83
+ * Split an OOXML bookmark list into (a) scope-marker pairs extracted via the
84
+ * `bw:scope:` prefix convention and (b) the remaining user bookmarks. The
85
+ * extraction is id-paired — a start with a prefix name pairs with the
86
+ * matching end by `bookmarkId`.
87
+ */
88
+ export function parseScopeMarkersFromBookmarks(
89
+ rawBookmarks: readonly RawBookmark[],
90
+ ): { scopeMarkers: ParsedScopeMarkerPair[]; remainingBookmarks: RawBookmark[] } {
91
+ const scopeStartsById = new Map<
92
+ string,
93
+ { scopeId: string; startIndex: number }
94
+ >();
95
+ const scopeMarkers: ParsedScopeMarkerPair[] = [];
96
+ const remainingBookmarks: RawBookmark[] = [];
97
+
98
+ for (const bm of rawBookmarks) {
99
+ if (bm.type === "bookmark_start") {
100
+ const name = bm.name ?? "";
101
+ if (name.startsWith(SCOPE_MARKER_BOOKMARK_PREFIX)) {
102
+ const scopeId = name.slice(SCOPE_MARKER_BOOKMARK_PREFIX.length);
103
+ scopeStartsById.set(bm.bookmarkId, {
104
+ scopeId,
105
+ startIndex: bm.index,
106
+ });
107
+ continue;
108
+ }
109
+ remainingBookmarks.push(bm);
110
+ continue;
111
+ }
112
+
113
+ const open = scopeStartsById.get(bm.bookmarkId);
114
+ if (open) {
115
+ scopeMarkers.push({
116
+ scopeId: open.scopeId,
117
+ bookmarkId: bm.bookmarkId,
118
+ startIndex: open.startIndex,
119
+ endIndex: bm.index,
120
+ });
121
+ scopeStartsById.delete(bm.bookmarkId);
122
+ continue;
123
+ }
124
+
125
+ remainingBookmarks.push(bm);
126
+ }
127
+
128
+ return { scopeMarkers, remainingBookmarks };
129
+ }
130
+
131
+ function walkInlineNodes(
132
+ node: DocumentRootNode | InlineNode | { children?: unknown; rows?: unknown; cells?: unknown; type?: string },
133
+ visit: (inline: InlineNode) => void,
134
+ ): void {
135
+ if (!node || typeof node !== "object") return;
136
+ const nt = (node as { type?: string }).type;
137
+
138
+ // Inline leaf node: visit it.
139
+ if (
140
+ nt === "text" ||
141
+ nt === "tab" ||
142
+ nt === "hard_break" ||
143
+ nt === "column_break" ||
144
+ nt === "symbol" ||
145
+ nt === "image" ||
146
+ nt === "bookmark_start" ||
147
+ nt === "bookmark_end" ||
148
+ nt === "scope_marker_start" ||
149
+ nt === "scope_marker_end" ||
150
+ nt === "opaque_inline" ||
151
+ nt === "footnote_ref" ||
152
+ nt === "chart_preview" ||
153
+ nt === "smartart_preview" ||
154
+ nt === "shape" ||
155
+ nt === "wordart" ||
156
+ nt === "vml_shape"
157
+ ) {
158
+ visit(node as InlineNode);
159
+ return;
160
+ }
161
+
162
+ const children = (node as { children?: unknown }).children;
163
+ if (Array.isArray(children)) {
164
+ for (const child of children) {
165
+ walkInlineNodes(child as InlineNode, visit);
166
+ }
167
+ }
168
+
169
+ if (nt === "table") {
170
+ const rows = (node as { rows?: unknown }).rows;
171
+ if (Array.isArray(rows)) {
172
+ for (const row of rows) {
173
+ walkInlineNodes(row as InlineNode, visit);
174
+ }
175
+ }
176
+ } else if (nt === "table_row") {
177
+ const cells = (node as { cells?: unknown }).cells;
178
+ if (Array.isArray(cells)) {
179
+ for (const cell of cells) {
180
+ walkInlineNodes(cell as InlineNode, visit);
181
+ }
182
+ }
183
+ }
184
+ }
@@ -0,0 +1,349 @@
1
+ /**
2
+ * Decompose a `settings.xml` document into a verbatim-preserving blueprint
3
+ * so the export-side serializer can perform graft mode: replace modelled
4
+ * top-level children with re-emitted XML while leaving every unmodelled
5
+ * child (`<w:defaultTabStop>`, `<w:characterSpacingControl>`,
6
+ * `<w:documentProtection>`, mail-merge state, etc.) byte-identical to the
7
+ * source.
8
+ *
9
+ * This is intentionally a separate, narrower scanner from the canonical
10
+ * parser at `parse-settings.ts`. The canonical parser throws away raw text;
11
+ * this scanner keeps every byte. The two run independently — neither
12
+ * affects the other — because the blueprint is a serializer-side concern.
13
+ *
14
+ * Authority: ECMA-376 §17.15 (settings.xml schema). Comments and the XML
15
+ * declaration are preserved as part of the prelude / interstitial strings,
16
+ * so a no-edit graft round-trips byte-identically to the source.
17
+ */
18
+
19
+ export interface SettingsBlueprintChild {
20
+ /**
21
+ * Local name of the top-level child (e.g. "compat", "compatSetting",
22
+ * "themeFontLang", "defaultTabStop").
23
+ */
24
+ localName: string;
25
+ /**
26
+ * Verbatim XML for the child element including its full content (for
27
+ * non-self-closing elements). Does NOT include any leading/trailing
28
+ * whitespace — that lives on `interstitialBefore`.
29
+ */
30
+ rawXml: string;
31
+ /**
32
+ * Whitespace + comments that appear between the previous boundary (the
33
+ * settings open tag for the first child, the previous child's `rawXml`
34
+ * end for subsequent children) and the start of this child's `rawXml`.
35
+ * The serializer must re-emit this verbatim before each child.
36
+ */
37
+ interstitialBefore: string;
38
+ }
39
+
40
+ export interface SettingsBlueprint {
41
+ /**
42
+ * Everything before the `<w:settings>` opening tag — the XML declaration
43
+ * if present plus any leading whitespace. Empty string when neither is
44
+ * present.
45
+ */
46
+ prelude: string;
47
+ /**
48
+ * The `<w:settings ...>` opening tag verbatim, including every xmlns
49
+ * declaration and any other root attributes. If the source uses a
50
+ * self-closing form (`<w:settings ... />`), this captures the full
51
+ * self-closing tag and `settingsCloseTag` is the empty string.
52
+ */
53
+ settingsOpenTag: string;
54
+ /**
55
+ * Top-level children of `<w:settings>` in document order with their
56
+ * verbatim raw XML and the interstitial whitespace/comments before each.
57
+ */
58
+ topLevelChildren: SettingsBlueprintChild[];
59
+ /**
60
+ * Whitespace + comments between the last child's `rawXml` end and the
61
+ * `</w:settings>` closing tag. The serializer must re-emit this verbatim
62
+ * after the last child.
63
+ */
64
+ trailingWhitespace: string;
65
+ /**
66
+ * The `</w:settings>` closing tag verbatim. Empty string when the source
67
+ * used a self-closing `<w:settings/>` form.
68
+ */
69
+ settingsCloseTag: string;
70
+ }
71
+
72
+ /**
73
+ * Scan a settings.xml document and decompose it into a verbatim-preserving
74
+ * blueprint. Throws if the document does not contain a `<w:settings>`
75
+ * root element.
76
+ */
77
+ export function parseSettingsBlueprint(xml: string): SettingsBlueprint {
78
+ const settingsTagStart = findSettingsOpenTagStart(xml);
79
+ if (settingsTagStart < 0) {
80
+ throw new Error("parseSettingsBlueprint: no <w:settings> element found");
81
+ }
82
+
83
+ const prelude = xml.slice(0, settingsTagStart);
84
+ const settingsTagEnd = findTagEnd(xml, settingsTagStart);
85
+ if (settingsTagEnd < 0) {
86
+ throw new Error("parseSettingsBlueprint: unterminated <w:settings> tag");
87
+ }
88
+ const settingsOpenTag = xml.slice(settingsTagStart, settingsTagEnd + 1);
89
+ const isSelfClosing = settingsOpenTag.endsWith("/>");
90
+
91
+ if (isSelfClosing) {
92
+ return {
93
+ prelude,
94
+ settingsOpenTag,
95
+ topLevelChildren: [],
96
+ trailingWhitespace: "",
97
+ settingsCloseTag: "",
98
+ };
99
+ }
100
+
101
+ // Walk children inside <w:settings>...</w:settings>.
102
+ const closeTagInfo = findSettingsCloseTag(xml, settingsTagEnd + 1);
103
+ if (!closeTagInfo) {
104
+ throw new Error("parseSettingsBlueprint: missing </w:settings> closing tag");
105
+ }
106
+ const innerStart = settingsTagEnd + 1;
107
+ const innerEnd = closeTagInfo.start;
108
+
109
+ const { children, trailing } = scanTopLevelChildren(xml, innerStart, innerEnd);
110
+
111
+ return {
112
+ prelude,
113
+ settingsOpenTag,
114
+ topLevelChildren: children,
115
+ trailingWhitespace: trailing,
116
+ settingsCloseTag: closeTagInfo.tag,
117
+ };
118
+ }
119
+
120
+ function findSettingsOpenTagStart(xml: string): number {
121
+ // Scan past <?xml ... ?> and <!-- comments --> until we hit a < that
122
+ // begins an element. Then verify that element is <w:settings.
123
+ let cursor = 0;
124
+ while (cursor < xml.length) {
125
+ if (xml.startsWith("<?", cursor)) {
126
+ const end = xml.indexOf("?>", cursor);
127
+ if (end < 0) return -1;
128
+ cursor = end + 2;
129
+ continue;
130
+ }
131
+ if (xml.startsWith("<!--", cursor)) {
132
+ const end = xml.indexOf("-->", cursor);
133
+ if (end < 0) return -1;
134
+ cursor = end + 3;
135
+ continue;
136
+ }
137
+ const lt = xml.indexOf("<", cursor);
138
+ if (lt < 0) return -1;
139
+ if (xml.startsWith("<?", lt) || xml.startsWith("<!--", lt)) {
140
+ cursor = lt;
141
+ continue;
142
+ }
143
+ // First real element. Confirm it's <w:settings (or fallback `settings`).
144
+ if (xml.startsWith("<w:settings", lt) || xml.startsWith("<settings", lt)) {
145
+ // Verify the next char is whitespace, '>', or '/' — not a longer name like <w:settingsExtra.
146
+ const after = lt + (xml.startsWith("<w:settings", lt) ? "<w:settings".length : "<settings".length);
147
+ const ch = xml[after];
148
+ if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r" || ch === ">" || ch === "/") {
149
+ return lt;
150
+ }
151
+ }
152
+ return -1;
153
+ }
154
+ return -1;
155
+ }
156
+
157
+ function findTagEnd(xml: string, tagStart: number): number {
158
+ // Skip until we find the closing > that terminates THIS tag.
159
+ // Attribute values cannot contain unescaped '>' so a naive scan works.
160
+ for (let i = tagStart; i < xml.length; i++) {
161
+ if (xml[i] === ">") return i;
162
+ }
163
+ return -1;
164
+ }
165
+
166
+ function findSettingsCloseTag(
167
+ xml: string,
168
+ startFrom: number,
169
+ ): { start: number; tag: string } | null {
170
+ // Find the LAST </w:settings> (or </settings>) — the document body cannot
171
+ // legally contain a nested settings element, so the first occurrence is
172
+ // also the last; we still scan to end-of-string defensively.
173
+ const candidates: Array<"</w:settings>" | "</settings>"> = [
174
+ "</w:settings>",
175
+ "</settings>",
176
+ ];
177
+ let bestIdx = -1;
178
+ let bestTag = "";
179
+ for (const candidate of candidates) {
180
+ const idx = xml.indexOf(candidate, startFrom);
181
+ if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
182
+ bestIdx = idx;
183
+ bestTag = candidate;
184
+ }
185
+ }
186
+ if (bestIdx < 0) return null;
187
+ return { start: bestIdx, tag: bestTag };
188
+ }
189
+
190
+ function scanTopLevelChildren(
191
+ xml: string,
192
+ innerStart: number,
193
+ innerEnd: number,
194
+ ): { children: SettingsBlueprintChild[]; trailing: string } {
195
+ const children: SettingsBlueprintChild[] = [];
196
+ let cursor = innerStart;
197
+
198
+ while (cursor < innerEnd) {
199
+ // Capture interstitial: whitespace + comments + processing instructions
200
+ // until the next element start.
201
+ const interstitialStart = cursor;
202
+ cursor = skipInterstitial(xml, cursor, innerEnd);
203
+ const interstitial = xml.slice(interstitialStart, cursor);
204
+
205
+ if (cursor >= innerEnd) {
206
+ // Pure trailing whitespace — no more children.
207
+ return { children, trailing: interstitial };
208
+ }
209
+
210
+ if (xml[cursor] !== "<") {
211
+ // Bare text content at the top level isn't legal in settings.xml;
212
+ // surface it via interstitial-as-trailing and stop.
213
+ return { children, trailing: interstitial + xml.slice(cursor, innerEnd) };
214
+ }
215
+
216
+ // Begin scanning an element.
217
+ const elementStart = cursor;
218
+ const tagEnd = findTagEnd(xml, elementStart);
219
+ if (tagEnd < 0 || tagEnd >= innerEnd) {
220
+ throw new Error(
221
+ "parseSettingsBlueprint: unterminated tag inside <w:settings>",
222
+ );
223
+ }
224
+ const openTag = xml.slice(elementStart, tagEnd + 1);
225
+ const localName = readLocalNameFromOpenTag(openTag);
226
+
227
+ let elementEnd: number;
228
+ if (openTag.endsWith("/>")) {
229
+ // Self-closing.
230
+ elementEnd = tagEnd + 1;
231
+ } else {
232
+ // Find matching closing tag, accounting for nested same-named elements.
233
+ const closingPattern = `</${getQualifiedName(openTag)}>`;
234
+ const matchEnd = findMatchingClose(
235
+ xml,
236
+ tagEnd + 1,
237
+ innerEnd,
238
+ getQualifiedName(openTag),
239
+ );
240
+ if (matchEnd < 0) {
241
+ throw new Error(
242
+ `parseSettingsBlueprint: missing closing ${closingPattern}`,
243
+ );
244
+ }
245
+ elementEnd = matchEnd;
246
+ }
247
+
248
+ const rawXml = xml.slice(elementStart, elementEnd);
249
+ children.push({ localName, rawXml, interstitialBefore: interstitial });
250
+ cursor = elementEnd;
251
+ }
252
+
253
+ return { children, trailing: "" };
254
+ }
255
+
256
+ function skipInterstitial(xml: string, from: number, end: number): number {
257
+ let cursor = from;
258
+ while (cursor < end) {
259
+ if (xml.startsWith("<!--", cursor)) {
260
+ const stop = xml.indexOf("-->", cursor);
261
+ if (stop < 0 || stop + 3 > end) return cursor;
262
+ cursor = stop + 3;
263
+ continue;
264
+ }
265
+ if (xml.startsWith("<?", cursor)) {
266
+ const stop = xml.indexOf("?>", cursor);
267
+ if (stop < 0 || stop + 2 > end) return cursor;
268
+ cursor = stop + 2;
269
+ continue;
270
+ }
271
+ const ch = xml[cursor];
272
+ if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
273
+ cursor++;
274
+ continue;
275
+ }
276
+ break;
277
+ }
278
+ return cursor;
279
+ }
280
+
281
+ function readLocalNameFromOpenTag(tag: string): string {
282
+ // tag looks like '<w:foo attr="bar"/>' or '<foo>' or '<w:foo>'.
283
+ const inside = tag.slice(1, tag.endsWith("/>") ? -2 : -1).trim();
284
+ const space = inside.search(/\s/u);
285
+ const qualified = space < 0 ? inside : inside.slice(0, space);
286
+ const colon = qualified.indexOf(":");
287
+ return colon < 0 ? qualified : qualified.slice(colon + 1);
288
+ }
289
+
290
+ function getQualifiedName(tag: string): string {
291
+ const inside = tag.slice(1, tag.endsWith("/>") ? -2 : -1).trim();
292
+ const space = inside.search(/\s/u);
293
+ return space < 0 ? inside : inside.slice(0, space);
294
+ }
295
+
296
+ function findMatchingClose(
297
+ xml: string,
298
+ from: number,
299
+ end: number,
300
+ qualifiedName: string,
301
+ ): number {
302
+ // Walk forward, tracking nesting depth for elements with the same
303
+ // qualifiedName so nested same-name elements don't terminate early.
304
+ const openPattern = `<${qualifiedName}`;
305
+ const closePattern = `</${qualifiedName}>`;
306
+ let cursor = from;
307
+ let depth = 1;
308
+ while (cursor < end) {
309
+ // Skip comments + PIs so a '<' inside a comment doesn't count.
310
+ if (xml.startsWith("<!--", cursor)) {
311
+ const stop = xml.indexOf("-->", cursor);
312
+ if (stop < 0 || stop + 3 > end) return -1;
313
+ cursor = stop + 3;
314
+ continue;
315
+ }
316
+ if (xml.startsWith("<?", cursor)) {
317
+ const stop = xml.indexOf("?>", cursor);
318
+ if (stop < 0 || stop + 2 > end) return -1;
319
+ cursor = stop + 2;
320
+ continue;
321
+ }
322
+ if (xml.startsWith(closePattern, cursor)) {
323
+ depth--;
324
+ if (depth === 0) {
325
+ return cursor + closePattern.length;
326
+ }
327
+ cursor += closePattern.length;
328
+ continue;
329
+ }
330
+ if (xml.startsWith(openPattern, cursor)) {
331
+ // Verify the next char makes this a real same-name open tag (not e.g.
332
+ // <w:compatSetting when looking for <w:compat).
333
+ const after = cursor + openPattern.length;
334
+ const ch = xml[after];
335
+ if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r" || ch === ">" || ch === "/") {
336
+ // Find tag end to know whether it's self-closing.
337
+ const tagEnd = findTagEnd(xml, cursor);
338
+ if (tagEnd < 0 || tagEnd >= end) return -1;
339
+ if (xml[tagEnd - 1] !== "/") {
340
+ depth++;
341
+ }
342
+ cursor = tagEnd + 1;
343
+ continue;
344
+ }
345
+ }
346
+ cursor++;
347
+ }
348
+ return -1;
349
+ }
@@ -1,4 +1,7 @@
1
- import type { DocumentSettings } from "../../model/canonical-document.ts";
1
+ import type {
2
+ CompatSetting,
3
+ DocumentSettings,
4
+ } from "../../model/canonical-document.ts";
2
5
 
3
6
  interface XmlElementNode {
4
7
  type: "element";
@@ -27,6 +30,11 @@ export function parseSettingsXml(xml: string): DocumentSettings {
27
30
 
28
31
  const evenAndOddHeaders = findChildElementOptional(settingsElement, "evenAndOddHeaders");
29
32
  const zoom = findChildElementOptional(settingsElement, "zoom");
33
+ const compat = findChildElementOptional(settingsElement, "compat");
34
+ const compatPartition = compat ? partitionCompat(compat) : undefined;
35
+ const rootCompatFlags = readRootCompatFlags(settingsElement);
36
+ const themeFontLangElement = findChildElementOptional(settingsElement, "themeFontLang");
37
+ const unmodelled = readUnmodelledSettingsChildren(settingsElement);
30
38
 
31
39
  return {
32
40
  ...(evenAndOddHeaders
@@ -35,9 +43,97 @@ export function parseSettingsXml(xml: string): DocumentSettings {
35
43
  }
36
44
  : {}),
37
45
  ...(zoom ? readZoomLevel(zoom) : {}),
46
+ ...(compatPartition && compatPartition.compatSettings.length > 0
47
+ ? { compatSettings: compatPartition.compatSettings }
48
+ : {}),
49
+ ...(compatPartition && Object.keys(compatPartition.compatFlags).length > 0
50
+ ? { compatFlags: compatPartition.compatFlags }
51
+ : {}),
52
+ ...(Object.keys(rootCompatFlags).length > 0 ? { rootCompatFlags } : {}),
53
+ ...(themeFontLangElement
54
+ ? { themeFontLang: { ...themeFontLangElement.attributes } }
55
+ : {}),
56
+ ...(unmodelled.length > 0 ? { unmodelledSettingsChildren: unmodelled } : {}),
38
57
  };
39
58
  }
40
59
 
60
+ /**
61
+ * Modelled top-level <w:settings> child local names. Anything not in this
62
+ * set (and not in ROOT_COMPAT_FLAG_NAMES) ends up in
63
+ * `unmodelledSettingsChildren` for the Phase 2 serializer to validate.
64
+ */
65
+ const MODELLED_SETTINGS_CHILD_NAMES = new Set<string>([
66
+ "evenAndOddHeaders",
67
+ "zoom",
68
+ "compat",
69
+ "themeFontLang",
70
+ ]);
71
+
72
+ function readUnmodelledSettingsChildren(
73
+ settingsElement: XmlElementNode,
74
+ ): string[] {
75
+ const names: string[] = [];
76
+ for (const child of settingsElement.children) {
77
+ if (child.type !== "element") continue;
78
+ const local = localName(child.name);
79
+ if (MODELLED_SETTINGS_CHILD_NAMES.has(local)) continue;
80
+ if (ROOT_COMPAT_FLAG_NAMES.has(local)) continue;
81
+ names.push(local);
82
+ }
83
+ return names;
84
+ }
85
+
86
+ /**
87
+ * Settings-level compat-adjacent flag elements (NOT inside <w:compat>) that
88
+ * the strict OpenXML SDK validator flags. Kept as a small allow-list; extend
89
+ * as the corpus reveals more.
90
+ *
91
+ * Exported because the export-side graft serializer
92
+ * (`src/io/export/serialize-settings.ts`) needs the same allow-list to know
93
+ * which top-level source children are "modelled" and therefore subject to
94
+ * canonical replacement.
95
+ */
96
+ export const ROOT_COMPAT_FLAG_NAMES: ReadonlySet<string> = new Set<string>([
97
+ "doNotEmbedSmartTags",
98
+ ]);
99
+
100
+ function readRootCompatFlags(
101
+ settingsElement: XmlElementNode,
102
+ ): Record<string, boolean> {
103
+ const flags: Record<string, boolean> = {};
104
+ for (const child of settingsElement.children) {
105
+ if (child.type !== "element") continue;
106
+ const local = localName(child.name);
107
+ if (!ROOT_COMPAT_FLAG_NAMES.has(local)) continue;
108
+ flags[local] = readOnOffValue(child, true);
109
+ }
110
+ return flags;
111
+ }
112
+
113
+ interface CompatPartition {
114
+ compatSettings: CompatSetting[];
115
+ compatFlags: Record<string, boolean>;
116
+ }
117
+
118
+ function partitionCompat(compatElement: XmlElementNode): CompatPartition {
119
+ const compatSettings: CompatSetting[] = [];
120
+ const compatFlags: Record<string, boolean> = {};
121
+ for (const child of compatElement.children) {
122
+ if (child.type !== "element") continue;
123
+ const local = localName(child.name);
124
+ if (local === "compatSetting") {
125
+ compatSettings.push({
126
+ name: child.attributes["w:name"] ?? child.attributes.name ?? "",
127
+ uri: child.attributes["w:uri"] ?? child.attributes.uri ?? "",
128
+ value: child.attributes["w:val"] ?? child.attributes.val ?? "",
129
+ });
130
+ continue;
131
+ }
132
+ compatFlags[local] = readOnOffValue(child, true);
133
+ }
134
+ return { compatSettings, compatFlags };
135
+ }
136
+
41
137
  function findChildElementOptional(
42
138
  node: XmlElementNode,
43
139
  childLocalName: string,