@beyondwork/docx-react-component 1.0.47 → 1.0.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -11
- package/package.json +30 -41
- package/src/api/public-types.ts +199 -13
- package/src/compare/diff-engine.ts +4 -0
- package/src/core/commands/add-scope.ts +257 -0
- package/src/core/commands/formatting-commands.ts +2 -0
- package/src/core/commands/index.ts +9 -1
- package/src/core/commands/text-commands.ts +3 -1
- package/src/core/schema/text-schema.ts +95 -1
- package/src/core/selection/anchor-conversion.ts +112 -0
- package/src/core/selection/review-anchors.ts +108 -3
- package/src/core/state/text-transaction.ts +103 -7
- package/src/internal/harness-debug-ports.ts +168 -0
- package/src/io/chart-preview-resolver.ts +59 -1
- package/src/io/docx-session.ts +226 -38
- package/src/io/export/serialize-main-document.ts +46 -0
- package/src/io/export/serialize-paragraph-formatting.ts +8 -0
- package/src/io/export/serialize-run-formatting.ts +10 -1
- package/src/io/export/serialize-settings.ts +421 -0
- package/src/io/export/serialize-styles.ts +10 -0
- package/src/io/normalize/normalize-text.ts +1 -0
- package/src/io/ooxml/chart/chart-style-table.ts +543 -0
- package/src/io/ooxml/chart/color-palette.ts +101 -0
- package/src/io/ooxml/chart/compose-series-color.ts +147 -0
- package/src/io/ooxml/chart/parse-axis.ts +277 -0
- package/src/io/ooxml/chart/parse-chart-space.ts +885 -0
- package/src/io/ooxml/chart/parse-series.ts +635 -0
- package/src/io/ooxml/chart/resolve-color.ts +261 -0
- package/src/io/ooxml/chart/types.ts +439 -0
- package/src/io/ooxml/parse-block-structure.ts +99 -0
- package/src/io/ooxml/parse-complex-content.ts +90 -2
- package/src/io/ooxml/parse-main-document.ts +156 -1
- package/src/io/ooxml/parse-paragraph-formatting.ts +46 -0
- package/src/io/ooxml/parse-run-formatting.ts +49 -0
- package/src/io/ooxml/parse-scope-markers.ts +184 -0
- package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
- package/src/io/ooxml/parse-settings.ts +97 -1
- package/src/io/ooxml/parse-styles.ts +65 -0
- package/src/io/ooxml/parse-theme.ts +2 -127
- package/src/io/ooxml/property-grab-bag.ts +211 -0
- package/src/io/ooxml/xml-attr-helpers.ts +59 -1
- package/src/io/ooxml/xml-parser.ts +142 -0
- package/src/model/canonical-document.ts +160 -0
- package/src/model/scope-markers.ts +144 -0
- package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
- package/src/runtime/collab/checkpoint-election.ts +75 -0
- package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
- package/src/runtime/collab/checkpoint-store.ts +115 -0
- package/src/runtime/collab/event-types.ts +27 -0
- package/src/runtime/collab/index.ts +29 -0
- package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
- package/src/runtime/collab/runtime-collab-sync.ts +330 -0
- package/src/runtime/collab/workflow-shared.ts +247 -0
- package/src/runtime/document-locations.ts +1 -9
- package/src/runtime/document-outline.ts +1 -9
- package/src/runtime/document-runtime.ts +288 -65
- package/src/runtime/editor-surface/capabilities.ts +63 -50
- package/src/runtime/hyperlink-color-resolver.ts +119 -0
- package/src/runtime/layout/layout-engine-version.ts +8 -1
- package/src/runtime/prerender/cache-envelope.ts +19 -7
- package/src/runtime/prerender/cache-key.ts +25 -14
- package/src/runtime/prerender/canonical-document-hash.ts +63 -0
- package/src/runtime/prerender/customxml-cache.ts +211 -0
- package/src/runtime/prerender/customxml-probe.ts +78 -0
- package/src/runtime/prerender/prerender-document.ts +74 -7
- package/src/runtime/scope-resolver.ts +148 -0
- package/src/runtime/scope-tag-registry.ts +10 -0
- package/src/runtime/surface-projection.ts +102 -37
- package/src/runtime/theme-color-resolver.ts +188 -0
- package/src/runtime/workflow-markup.ts +7 -18
- package/src/ui/WordReviewEditor.tsx +48 -2
- package/src/ui/editor-runtime-boundary.ts +42 -1
- package/src/ui/headless/selection-helpers.ts +10 -23
- package/src/ui/runtime-shortcut-dispatch.ts +12 -7
- package/src/ui/unsupported-previews-policy.ts +23 -0
- package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +10 -0
- package/src/ui-tailwind/editor-surface/perf-probe.ts +1 -0
- package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +47 -0
- package/src/ui-tailwind/page-stack/use-visible-block-range.ts +88 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +16 -1
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
CanonicalDocument,
|
|
3
|
+
DocumentRootNode,
|
|
4
|
+
InlineNode,
|
|
5
|
+
} from "../../model/canonical-document.ts";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Reserved OOXML bookmark-name prefix used to discriminate S1 scope markers
|
|
9
|
+
* from user-authored bookmarks. On export, each scope marker emits as
|
|
10
|
+
* `<w:bookmarkStart w:name="bw:scope:<scopeId>"/>` / `<w:bookmarkEnd/>`. On
|
|
11
|
+
* import, any bookmark whose name starts with this prefix is extracted as a
|
|
12
|
+
* `scope_marker_*` inline node pair and removed from the regular bookmark
|
|
13
|
+
* list so user-facing bookmark APIs stay clean.
|
|
14
|
+
*/
|
|
15
|
+
export const SCOPE_MARKER_BOOKMARK_PREFIX = "bw:scope:";
|
|
16
|
+
|
|
17
|
+
export interface ScopeMarkerBookmark {
|
|
18
|
+
/** Serialized bookmark id (shared between start + end in the OOXML pair). */
|
|
19
|
+
bookmarkId: string;
|
|
20
|
+
/** `bw:scope:<scopeId>` — caller applies the prefix via the exported constant. */
|
|
21
|
+
name: string;
|
|
22
|
+
boundary: "start" | "end";
|
|
23
|
+
scopeId: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Walk a canonical document in pre-order and return one pair of bookmark
|
|
28
|
+
* descriptors for each scope-marker pair found. The returned objects are
|
|
29
|
+
* OOXML-flavor (paired `w:id`, `w:name` on start only, end references id)
|
|
30
|
+
* so callers can weave them straight into the `<w:bookmarkStart>` /
|
|
31
|
+
* `<w:bookmarkEnd>` emit path.
|
|
32
|
+
*/
|
|
33
|
+
export function serializeScopeMarkersToBookmarks(
|
|
34
|
+
document: CanonicalDocument | Pick<CanonicalDocument, "content">,
|
|
35
|
+
): ScopeMarkerBookmark[] {
|
|
36
|
+
const root = ("content" in document
|
|
37
|
+
? (document.content as DocumentRootNode)
|
|
38
|
+
: (document as unknown as DocumentRootNode));
|
|
39
|
+
const out: ScopeMarkerBookmark[] = [];
|
|
40
|
+
let bookmarkIdCounter = 0;
|
|
41
|
+
const scopeIdToBookmarkId = new Map<string, string>();
|
|
42
|
+
|
|
43
|
+
walkInlineNodes(root, (node) => {
|
|
44
|
+
if (node.type === "scope_marker_start") {
|
|
45
|
+
const bookmarkId = String(bookmarkIdCounter);
|
|
46
|
+
bookmarkIdCounter += 1;
|
|
47
|
+
scopeIdToBookmarkId.set(node.scopeId, bookmarkId);
|
|
48
|
+
out.push({
|
|
49
|
+
bookmarkId,
|
|
50
|
+
name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
|
|
51
|
+
boundary: "start",
|
|
52
|
+
scopeId: node.scopeId,
|
|
53
|
+
});
|
|
54
|
+
} else if (node.type === "scope_marker_end") {
|
|
55
|
+
const bookmarkId = scopeIdToBookmarkId.get(node.scopeId) ?? String(bookmarkIdCounter++);
|
|
56
|
+
out.push({
|
|
57
|
+
bookmarkId,
|
|
58
|
+
name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
|
|
59
|
+
boundary: "end",
|
|
60
|
+
scopeId: node.scopeId,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
return out;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface ParsedScopeMarkerPair {
|
|
69
|
+
scopeId: string;
|
|
70
|
+
bookmarkId: string;
|
|
71
|
+
startIndex: number;
|
|
72
|
+
endIndex: number;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export interface RawBookmark {
|
|
76
|
+
readonly type: "bookmark_start" | "bookmark_end";
|
|
77
|
+
readonly bookmarkId: string;
|
|
78
|
+
readonly name?: string;
|
|
79
|
+
readonly index: number;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Split an OOXML bookmark list into (a) scope-marker pairs extracted via the
|
|
84
|
+
* `bw:scope:` prefix convention and (b) the remaining user bookmarks. The
|
|
85
|
+
* extraction is id-paired — a start with a prefix name pairs with the
|
|
86
|
+
* matching end by `bookmarkId`.
|
|
87
|
+
*/
|
|
88
|
+
export function parseScopeMarkersFromBookmarks(
|
|
89
|
+
rawBookmarks: readonly RawBookmark[],
|
|
90
|
+
): { scopeMarkers: ParsedScopeMarkerPair[]; remainingBookmarks: RawBookmark[] } {
|
|
91
|
+
const scopeStartsById = new Map<
|
|
92
|
+
string,
|
|
93
|
+
{ scopeId: string; startIndex: number }
|
|
94
|
+
>();
|
|
95
|
+
const scopeMarkers: ParsedScopeMarkerPair[] = [];
|
|
96
|
+
const remainingBookmarks: RawBookmark[] = [];
|
|
97
|
+
|
|
98
|
+
for (const bm of rawBookmarks) {
|
|
99
|
+
if (bm.type === "bookmark_start") {
|
|
100
|
+
const name = bm.name ?? "";
|
|
101
|
+
if (name.startsWith(SCOPE_MARKER_BOOKMARK_PREFIX)) {
|
|
102
|
+
const scopeId = name.slice(SCOPE_MARKER_BOOKMARK_PREFIX.length);
|
|
103
|
+
scopeStartsById.set(bm.bookmarkId, {
|
|
104
|
+
scopeId,
|
|
105
|
+
startIndex: bm.index,
|
|
106
|
+
});
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
remainingBookmarks.push(bm);
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const open = scopeStartsById.get(bm.bookmarkId);
|
|
114
|
+
if (open) {
|
|
115
|
+
scopeMarkers.push({
|
|
116
|
+
scopeId: open.scopeId,
|
|
117
|
+
bookmarkId: bm.bookmarkId,
|
|
118
|
+
startIndex: open.startIndex,
|
|
119
|
+
endIndex: bm.index,
|
|
120
|
+
});
|
|
121
|
+
scopeStartsById.delete(bm.bookmarkId);
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
remainingBookmarks.push(bm);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return { scopeMarkers, remainingBookmarks };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function walkInlineNodes(
|
|
132
|
+
node: DocumentRootNode | InlineNode | { children?: unknown; rows?: unknown; cells?: unknown; type?: string },
|
|
133
|
+
visit: (inline: InlineNode) => void,
|
|
134
|
+
): void {
|
|
135
|
+
if (!node || typeof node !== "object") return;
|
|
136
|
+
const nt = (node as { type?: string }).type;
|
|
137
|
+
|
|
138
|
+
// Inline leaf node: visit it.
|
|
139
|
+
if (
|
|
140
|
+
nt === "text" ||
|
|
141
|
+
nt === "tab" ||
|
|
142
|
+
nt === "hard_break" ||
|
|
143
|
+
nt === "column_break" ||
|
|
144
|
+
nt === "symbol" ||
|
|
145
|
+
nt === "image" ||
|
|
146
|
+
nt === "bookmark_start" ||
|
|
147
|
+
nt === "bookmark_end" ||
|
|
148
|
+
nt === "scope_marker_start" ||
|
|
149
|
+
nt === "scope_marker_end" ||
|
|
150
|
+
nt === "opaque_inline" ||
|
|
151
|
+
nt === "footnote_ref" ||
|
|
152
|
+
nt === "chart_preview" ||
|
|
153
|
+
nt === "smartart_preview" ||
|
|
154
|
+
nt === "shape" ||
|
|
155
|
+
nt === "wordart" ||
|
|
156
|
+
nt === "vml_shape"
|
|
157
|
+
) {
|
|
158
|
+
visit(node as InlineNode);
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const children = (node as { children?: unknown }).children;
|
|
163
|
+
if (Array.isArray(children)) {
|
|
164
|
+
for (const child of children) {
|
|
165
|
+
walkInlineNodes(child as InlineNode, visit);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (nt === "table") {
|
|
170
|
+
const rows = (node as { rows?: unknown }).rows;
|
|
171
|
+
if (Array.isArray(rows)) {
|
|
172
|
+
for (const row of rows) {
|
|
173
|
+
walkInlineNodes(row as InlineNode, visit);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
} else if (nt === "table_row") {
|
|
177
|
+
const cells = (node as { cells?: unknown }).cells;
|
|
178
|
+
if (Array.isArray(cells)) {
|
|
179
|
+
for (const cell of cells) {
|
|
180
|
+
walkInlineNodes(cell as InlineNode, visit);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decompose a `settings.xml` document into a verbatim-preserving blueprint
|
|
3
|
+
* so the export-side serializer can perform graft mode: replace modelled
|
|
4
|
+
* top-level children with re-emitted XML while leaving every unmodelled
|
|
5
|
+
* child (`<w:defaultTabStop>`, `<w:characterSpacingControl>`,
|
|
6
|
+
* `<w:documentProtection>`, mail-merge state, etc.) byte-identical to the
|
|
7
|
+
* source.
|
|
8
|
+
*
|
|
9
|
+
* This is intentionally a separate, narrower scanner from the canonical
|
|
10
|
+
* parser at `parse-settings.ts`. The canonical parser throws away raw text;
|
|
11
|
+
* this scanner keeps every byte. The two run independently — neither
|
|
12
|
+
* affects the other — because the blueprint is a serializer-side concern.
|
|
13
|
+
*
|
|
14
|
+
* Authority: ECMA-376 §17.15 (settings.xml schema). Comments and the XML
|
|
15
|
+
* declaration are preserved as part of the prelude / interstitial strings,
|
|
16
|
+
* so a no-edit graft round-trips byte-identically to the source.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
export interface SettingsBlueprintChild {
|
|
20
|
+
/**
|
|
21
|
+
* Local name of the top-level child (e.g. "compat", "compatSetting",
|
|
22
|
+
* "themeFontLang", "defaultTabStop").
|
|
23
|
+
*/
|
|
24
|
+
localName: string;
|
|
25
|
+
/**
|
|
26
|
+
* Verbatim XML for the child element including its full content (for
|
|
27
|
+
* non-self-closing elements). Does NOT include any leading/trailing
|
|
28
|
+
* whitespace — that lives on `interstitialBefore`.
|
|
29
|
+
*/
|
|
30
|
+
rawXml: string;
|
|
31
|
+
/**
|
|
32
|
+
* Whitespace + comments that appear between the previous boundary (the
|
|
33
|
+
* settings open tag for the first child, the previous child's `rawXml`
|
|
34
|
+
* end for subsequent children) and the start of this child's `rawXml`.
|
|
35
|
+
* The serializer must re-emit this verbatim before each child.
|
|
36
|
+
*/
|
|
37
|
+
interstitialBefore: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface SettingsBlueprint {
|
|
41
|
+
/**
|
|
42
|
+
* Everything before the `<w:settings>` opening tag — the XML declaration
|
|
43
|
+
* if present plus any leading whitespace. Empty string when neither is
|
|
44
|
+
* present.
|
|
45
|
+
*/
|
|
46
|
+
prelude: string;
|
|
47
|
+
/**
|
|
48
|
+
* The `<w:settings ...>` opening tag verbatim, including every xmlns
|
|
49
|
+
* declaration and any other root attributes. If the source uses a
|
|
50
|
+
* self-closing form (`<w:settings ... />`), this captures the full
|
|
51
|
+
* self-closing tag and `settingsCloseTag` is the empty string.
|
|
52
|
+
*/
|
|
53
|
+
settingsOpenTag: string;
|
|
54
|
+
/**
|
|
55
|
+
* Top-level children of `<w:settings>` in document order with their
|
|
56
|
+
* verbatim raw XML and the interstitial whitespace/comments before each.
|
|
57
|
+
*/
|
|
58
|
+
topLevelChildren: SettingsBlueprintChild[];
|
|
59
|
+
/**
|
|
60
|
+
* Whitespace + comments between the last child's `rawXml` end and the
|
|
61
|
+
* `</w:settings>` closing tag. The serializer must re-emit this verbatim
|
|
62
|
+
* after the last child.
|
|
63
|
+
*/
|
|
64
|
+
trailingWhitespace: string;
|
|
65
|
+
/**
|
|
66
|
+
* The `</w:settings>` closing tag verbatim. Empty string when the source
|
|
67
|
+
* used a self-closing `<w:settings/>` form.
|
|
68
|
+
*/
|
|
69
|
+
settingsCloseTag: string;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Scan a settings.xml document and decompose it into a verbatim-preserving
|
|
74
|
+
* blueprint. Throws if the document does not contain a `<w:settings>`
|
|
75
|
+
* root element.
|
|
76
|
+
*/
|
|
77
|
+
export function parseSettingsBlueprint(xml: string): SettingsBlueprint {
|
|
78
|
+
const settingsTagStart = findSettingsOpenTagStart(xml);
|
|
79
|
+
if (settingsTagStart < 0) {
|
|
80
|
+
throw new Error("parseSettingsBlueprint: no <w:settings> element found");
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const prelude = xml.slice(0, settingsTagStart);
|
|
84
|
+
const settingsTagEnd = findTagEnd(xml, settingsTagStart);
|
|
85
|
+
if (settingsTagEnd < 0) {
|
|
86
|
+
throw new Error("parseSettingsBlueprint: unterminated <w:settings> tag");
|
|
87
|
+
}
|
|
88
|
+
const settingsOpenTag = xml.slice(settingsTagStart, settingsTagEnd + 1);
|
|
89
|
+
const isSelfClosing = settingsOpenTag.endsWith("/>");
|
|
90
|
+
|
|
91
|
+
if (isSelfClosing) {
|
|
92
|
+
return {
|
|
93
|
+
prelude,
|
|
94
|
+
settingsOpenTag,
|
|
95
|
+
topLevelChildren: [],
|
|
96
|
+
trailingWhitespace: "",
|
|
97
|
+
settingsCloseTag: "",
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Walk children inside <w:settings>...</w:settings>.
|
|
102
|
+
const closeTagInfo = findSettingsCloseTag(xml, settingsTagEnd + 1);
|
|
103
|
+
if (!closeTagInfo) {
|
|
104
|
+
throw new Error("parseSettingsBlueprint: missing </w:settings> closing tag");
|
|
105
|
+
}
|
|
106
|
+
const innerStart = settingsTagEnd + 1;
|
|
107
|
+
const innerEnd = closeTagInfo.start;
|
|
108
|
+
|
|
109
|
+
const { children, trailing } = scanTopLevelChildren(xml, innerStart, innerEnd);
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
prelude,
|
|
113
|
+
settingsOpenTag,
|
|
114
|
+
topLevelChildren: children,
|
|
115
|
+
trailingWhitespace: trailing,
|
|
116
|
+
settingsCloseTag: closeTagInfo.tag,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function findSettingsOpenTagStart(xml: string): number {
|
|
121
|
+
// Scan past <?xml ... ?> and <!-- comments --> until we hit a < that
|
|
122
|
+
// begins an element. Then verify that element is <w:settings.
|
|
123
|
+
let cursor = 0;
|
|
124
|
+
while (cursor < xml.length) {
|
|
125
|
+
if (xml.startsWith("<?", cursor)) {
|
|
126
|
+
const end = xml.indexOf("?>", cursor);
|
|
127
|
+
if (end < 0) return -1;
|
|
128
|
+
cursor = end + 2;
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
132
|
+
const end = xml.indexOf("-->", cursor);
|
|
133
|
+
if (end < 0) return -1;
|
|
134
|
+
cursor = end + 3;
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
const lt = xml.indexOf("<", cursor);
|
|
138
|
+
if (lt < 0) return -1;
|
|
139
|
+
if (xml.startsWith("<?", lt) || xml.startsWith("<!--", lt)) {
|
|
140
|
+
cursor = lt;
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
// First real element. Confirm it's <w:settings (or fallback `settings`).
|
|
144
|
+
if (xml.startsWith("<w:settings", lt) || xml.startsWith("<settings", lt)) {
|
|
145
|
+
// Verify the next char is whitespace, '>', or '/' — not a longer name like <w:settingsExtra.
|
|
146
|
+
const after = lt + (xml.startsWith("<w:settings", lt) ? "<w:settings".length : "<settings".length);
|
|
147
|
+
const ch = xml[after];
|
|
148
|
+
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r" || ch === ">" || ch === "/") {
|
|
149
|
+
return lt;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return -1;
|
|
153
|
+
}
|
|
154
|
+
return -1;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function findTagEnd(xml: string, tagStart: number): number {
|
|
158
|
+
// Skip until we find the closing > that terminates THIS tag.
|
|
159
|
+
// Attribute values cannot contain unescaped '>' so a naive scan works.
|
|
160
|
+
for (let i = tagStart; i < xml.length; i++) {
|
|
161
|
+
if (xml[i] === ">") return i;
|
|
162
|
+
}
|
|
163
|
+
return -1;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function findSettingsCloseTag(
|
|
167
|
+
xml: string,
|
|
168
|
+
startFrom: number,
|
|
169
|
+
): { start: number; tag: string } | null {
|
|
170
|
+
// Find the LAST </w:settings> (or </settings>) — the document body cannot
|
|
171
|
+
// legally contain a nested settings element, so the first occurrence is
|
|
172
|
+
// also the last; we still scan to end-of-string defensively.
|
|
173
|
+
const candidates: Array<"</w:settings>" | "</settings>"> = [
|
|
174
|
+
"</w:settings>",
|
|
175
|
+
"</settings>",
|
|
176
|
+
];
|
|
177
|
+
let bestIdx = -1;
|
|
178
|
+
let bestTag = "";
|
|
179
|
+
for (const candidate of candidates) {
|
|
180
|
+
const idx = xml.indexOf(candidate, startFrom);
|
|
181
|
+
if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
|
|
182
|
+
bestIdx = idx;
|
|
183
|
+
bestTag = candidate;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
if (bestIdx < 0) return null;
|
|
187
|
+
return { start: bestIdx, tag: bestTag };
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function scanTopLevelChildren(
|
|
191
|
+
xml: string,
|
|
192
|
+
innerStart: number,
|
|
193
|
+
innerEnd: number,
|
|
194
|
+
): { children: SettingsBlueprintChild[]; trailing: string } {
|
|
195
|
+
const children: SettingsBlueprintChild[] = [];
|
|
196
|
+
let cursor = innerStart;
|
|
197
|
+
|
|
198
|
+
while (cursor < innerEnd) {
|
|
199
|
+
// Capture interstitial: whitespace + comments + processing instructions
|
|
200
|
+
// until the next element start.
|
|
201
|
+
const interstitialStart = cursor;
|
|
202
|
+
cursor = skipInterstitial(xml, cursor, innerEnd);
|
|
203
|
+
const interstitial = xml.slice(interstitialStart, cursor);
|
|
204
|
+
|
|
205
|
+
if (cursor >= innerEnd) {
|
|
206
|
+
// Pure trailing whitespace — no more children.
|
|
207
|
+
return { children, trailing: interstitial };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (xml[cursor] !== "<") {
|
|
211
|
+
// Bare text content at the top level isn't legal in settings.xml;
|
|
212
|
+
// surface it via interstitial-as-trailing and stop.
|
|
213
|
+
return { children, trailing: interstitial + xml.slice(cursor, innerEnd) };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Begin scanning an element.
|
|
217
|
+
const elementStart = cursor;
|
|
218
|
+
const tagEnd = findTagEnd(xml, elementStart);
|
|
219
|
+
if (tagEnd < 0 || tagEnd >= innerEnd) {
|
|
220
|
+
throw new Error(
|
|
221
|
+
"parseSettingsBlueprint: unterminated tag inside <w:settings>",
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
const openTag = xml.slice(elementStart, tagEnd + 1);
|
|
225
|
+
const localName = readLocalNameFromOpenTag(openTag);
|
|
226
|
+
|
|
227
|
+
let elementEnd: number;
|
|
228
|
+
if (openTag.endsWith("/>")) {
|
|
229
|
+
// Self-closing.
|
|
230
|
+
elementEnd = tagEnd + 1;
|
|
231
|
+
} else {
|
|
232
|
+
// Find matching closing tag, accounting for nested same-named elements.
|
|
233
|
+
const closingPattern = `</${getQualifiedName(openTag)}>`;
|
|
234
|
+
const matchEnd = findMatchingClose(
|
|
235
|
+
xml,
|
|
236
|
+
tagEnd + 1,
|
|
237
|
+
innerEnd,
|
|
238
|
+
getQualifiedName(openTag),
|
|
239
|
+
);
|
|
240
|
+
if (matchEnd < 0) {
|
|
241
|
+
throw new Error(
|
|
242
|
+
`parseSettingsBlueprint: missing closing ${closingPattern}`,
|
|
243
|
+
);
|
|
244
|
+
}
|
|
245
|
+
elementEnd = matchEnd;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const rawXml = xml.slice(elementStart, elementEnd);
|
|
249
|
+
children.push({ localName, rawXml, interstitialBefore: interstitial });
|
|
250
|
+
cursor = elementEnd;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return { children, trailing: "" };
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function skipInterstitial(xml: string, from: number, end: number): number {
|
|
257
|
+
let cursor = from;
|
|
258
|
+
while (cursor < end) {
|
|
259
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
260
|
+
const stop = xml.indexOf("-->", cursor);
|
|
261
|
+
if (stop < 0 || stop + 3 > end) return cursor;
|
|
262
|
+
cursor = stop + 3;
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
if (xml.startsWith("<?", cursor)) {
|
|
266
|
+
const stop = xml.indexOf("?>", cursor);
|
|
267
|
+
if (stop < 0 || stop + 2 > end) return cursor;
|
|
268
|
+
cursor = stop + 2;
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
const ch = xml[cursor];
|
|
272
|
+
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") {
|
|
273
|
+
cursor++;
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
break;
|
|
277
|
+
}
|
|
278
|
+
return cursor;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function readLocalNameFromOpenTag(tag: string): string {
|
|
282
|
+
// tag looks like '<w:foo attr="bar"/>' or '<foo>' or '<w:foo>'.
|
|
283
|
+
const inside = tag.slice(1, tag.endsWith("/>") ? -2 : -1).trim();
|
|
284
|
+
const space = inside.search(/\s/u);
|
|
285
|
+
const qualified = space < 0 ? inside : inside.slice(0, space);
|
|
286
|
+
const colon = qualified.indexOf(":");
|
|
287
|
+
return colon < 0 ? qualified : qualified.slice(colon + 1);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function getQualifiedName(tag: string): string {
|
|
291
|
+
const inside = tag.slice(1, tag.endsWith("/>") ? -2 : -1).trim();
|
|
292
|
+
const space = inside.search(/\s/u);
|
|
293
|
+
return space < 0 ? inside : inside.slice(0, space);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function findMatchingClose(
|
|
297
|
+
xml: string,
|
|
298
|
+
from: number,
|
|
299
|
+
end: number,
|
|
300
|
+
qualifiedName: string,
|
|
301
|
+
): number {
|
|
302
|
+
// Walk forward, tracking nesting depth for elements with the same
|
|
303
|
+
// qualifiedName so nested same-name elements don't terminate early.
|
|
304
|
+
const openPattern = `<${qualifiedName}`;
|
|
305
|
+
const closePattern = `</${qualifiedName}>`;
|
|
306
|
+
let cursor = from;
|
|
307
|
+
let depth = 1;
|
|
308
|
+
while (cursor < end) {
|
|
309
|
+
// Skip comments + PIs so a '<' inside a comment doesn't count.
|
|
310
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
311
|
+
const stop = xml.indexOf("-->", cursor);
|
|
312
|
+
if (stop < 0 || stop + 3 > end) return -1;
|
|
313
|
+
cursor = stop + 3;
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
if (xml.startsWith("<?", cursor)) {
|
|
317
|
+
const stop = xml.indexOf("?>", cursor);
|
|
318
|
+
if (stop < 0 || stop + 2 > end) return -1;
|
|
319
|
+
cursor = stop + 2;
|
|
320
|
+
continue;
|
|
321
|
+
}
|
|
322
|
+
if (xml.startsWith(closePattern, cursor)) {
|
|
323
|
+
depth--;
|
|
324
|
+
if (depth === 0) {
|
|
325
|
+
return cursor + closePattern.length;
|
|
326
|
+
}
|
|
327
|
+
cursor += closePattern.length;
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
330
|
+
if (xml.startsWith(openPattern, cursor)) {
|
|
331
|
+
// Verify the next char makes this a real same-name open tag (not e.g.
|
|
332
|
+
// <w:compatSetting when looking for <w:compat).
|
|
333
|
+
const after = cursor + openPattern.length;
|
|
334
|
+
const ch = xml[after];
|
|
335
|
+
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r" || ch === ">" || ch === "/") {
|
|
336
|
+
// Find tag end to know whether it's self-closing.
|
|
337
|
+
const tagEnd = findTagEnd(xml, cursor);
|
|
338
|
+
if (tagEnd < 0 || tagEnd >= end) return -1;
|
|
339
|
+
if (xml[tagEnd - 1] !== "/") {
|
|
340
|
+
depth++;
|
|
341
|
+
}
|
|
342
|
+
cursor = tagEnd + 1;
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
cursor++;
|
|
347
|
+
}
|
|
348
|
+
return -1;
|
|
349
|
+
}
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type {
|
|
2
|
+
CompatSetting,
|
|
3
|
+
DocumentSettings,
|
|
4
|
+
} from "../../model/canonical-document.ts";
|
|
2
5
|
|
|
3
6
|
interface XmlElementNode {
|
|
4
7
|
type: "element";
|
|
@@ -27,6 +30,11 @@ export function parseSettingsXml(xml: string): DocumentSettings {
|
|
|
27
30
|
|
|
28
31
|
const evenAndOddHeaders = findChildElementOptional(settingsElement, "evenAndOddHeaders");
|
|
29
32
|
const zoom = findChildElementOptional(settingsElement, "zoom");
|
|
33
|
+
const compat = findChildElementOptional(settingsElement, "compat");
|
|
34
|
+
const compatPartition = compat ? partitionCompat(compat) : undefined;
|
|
35
|
+
const rootCompatFlags = readRootCompatFlags(settingsElement);
|
|
36
|
+
const themeFontLangElement = findChildElementOptional(settingsElement, "themeFontLang");
|
|
37
|
+
const unmodelled = readUnmodelledSettingsChildren(settingsElement);
|
|
30
38
|
|
|
31
39
|
return {
|
|
32
40
|
...(evenAndOddHeaders
|
|
@@ -35,9 +43,97 @@ export function parseSettingsXml(xml: string): DocumentSettings {
|
|
|
35
43
|
}
|
|
36
44
|
: {}),
|
|
37
45
|
...(zoom ? readZoomLevel(zoom) : {}),
|
|
46
|
+
...(compatPartition && compatPartition.compatSettings.length > 0
|
|
47
|
+
? { compatSettings: compatPartition.compatSettings }
|
|
48
|
+
: {}),
|
|
49
|
+
...(compatPartition && Object.keys(compatPartition.compatFlags).length > 0
|
|
50
|
+
? { compatFlags: compatPartition.compatFlags }
|
|
51
|
+
: {}),
|
|
52
|
+
...(Object.keys(rootCompatFlags).length > 0 ? { rootCompatFlags } : {}),
|
|
53
|
+
...(themeFontLangElement
|
|
54
|
+
? { themeFontLang: { ...themeFontLangElement.attributes } }
|
|
55
|
+
: {}),
|
|
56
|
+
...(unmodelled.length > 0 ? { unmodelledSettingsChildren: unmodelled } : {}),
|
|
38
57
|
};
|
|
39
58
|
}
|
|
40
59
|
|
|
60
|
+
/**
|
|
61
|
+
* Modelled top-level <w:settings> child local names. Anything not in this
|
|
62
|
+
* set (and not in ROOT_COMPAT_FLAG_NAMES) ends up in
|
|
63
|
+
* `unmodelledSettingsChildren` for the Phase 2 serializer to validate.
|
|
64
|
+
*/
|
|
65
|
+
const MODELLED_SETTINGS_CHILD_NAMES = new Set<string>([
|
|
66
|
+
"evenAndOddHeaders",
|
|
67
|
+
"zoom",
|
|
68
|
+
"compat",
|
|
69
|
+
"themeFontLang",
|
|
70
|
+
]);
|
|
71
|
+
|
|
72
|
+
function readUnmodelledSettingsChildren(
|
|
73
|
+
settingsElement: XmlElementNode,
|
|
74
|
+
): string[] {
|
|
75
|
+
const names: string[] = [];
|
|
76
|
+
for (const child of settingsElement.children) {
|
|
77
|
+
if (child.type !== "element") continue;
|
|
78
|
+
const local = localName(child.name);
|
|
79
|
+
if (MODELLED_SETTINGS_CHILD_NAMES.has(local)) continue;
|
|
80
|
+
if (ROOT_COMPAT_FLAG_NAMES.has(local)) continue;
|
|
81
|
+
names.push(local);
|
|
82
|
+
}
|
|
83
|
+
return names;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Settings-level compat-adjacent flag elements (NOT inside <w:compat>) that
|
|
88
|
+
* the strict OpenXML SDK validator flags. Kept as a small allow-list; extend
|
|
89
|
+
* as the corpus reveals more.
|
|
90
|
+
*
|
|
91
|
+
* Exported because the export-side graft serializer
|
|
92
|
+
* (`src/io/export/serialize-settings.ts`) needs the same allow-list to know
|
|
93
|
+
* which top-level source children are "modelled" and therefore subject to
|
|
94
|
+
* canonical replacement.
|
|
95
|
+
*/
|
|
96
|
+
export const ROOT_COMPAT_FLAG_NAMES: ReadonlySet<string> = new Set<string>([
|
|
97
|
+
"doNotEmbedSmartTags",
|
|
98
|
+
]);
|
|
99
|
+
|
|
100
|
+
function readRootCompatFlags(
|
|
101
|
+
settingsElement: XmlElementNode,
|
|
102
|
+
): Record<string, boolean> {
|
|
103
|
+
const flags: Record<string, boolean> = {};
|
|
104
|
+
for (const child of settingsElement.children) {
|
|
105
|
+
if (child.type !== "element") continue;
|
|
106
|
+
const local = localName(child.name);
|
|
107
|
+
if (!ROOT_COMPAT_FLAG_NAMES.has(local)) continue;
|
|
108
|
+
flags[local] = readOnOffValue(child, true);
|
|
109
|
+
}
|
|
110
|
+
return flags;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
interface CompatPartition {
|
|
114
|
+
compatSettings: CompatSetting[];
|
|
115
|
+
compatFlags: Record<string, boolean>;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function partitionCompat(compatElement: XmlElementNode): CompatPartition {
|
|
119
|
+
const compatSettings: CompatSetting[] = [];
|
|
120
|
+
const compatFlags: Record<string, boolean> = {};
|
|
121
|
+
for (const child of compatElement.children) {
|
|
122
|
+
if (child.type !== "element") continue;
|
|
123
|
+
const local = localName(child.name);
|
|
124
|
+
if (local === "compatSetting") {
|
|
125
|
+
compatSettings.push({
|
|
126
|
+
name: child.attributes["w:name"] ?? child.attributes.name ?? "",
|
|
127
|
+
uri: child.attributes["w:uri"] ?? child.attributes.uri ?? "",
|
|
128
|
+
value: child.attributes["w:val"] ?? child.attributes.val ?? "",
|
|
129
|
+
});
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
compatFlags[local] = readOnOffValue(child, true);
|
|
133
|
+
}
|
|
134
|
+
return { compatSettings, compatFlags };
|
|
135
|
+
}
|
|
136
|
+
|
|
41
137
|
function findChildElementOptional(
|
|
42
138
|
node: XmlElementNode,
|
|
43
139
|
childLocalName: string,
|