@beyondwork/docx-react-component 1.0.18 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/package.json +24 -34
- package/src/api/README.md +5 -1
- package/src/api/public-types.ts +374 -4
- package/src/api/session-state.ts +58 -0
- package/src/core/commands/formatting-commands.ts +1 -0
- package/src/core/commands/image-commands.ts +147 -0
- package/src/core/commands/index.ts +5 -1
- package/src/core/commands/list-commands.ts +231 -36
- package/src/core/commands/paragraph-layout-commands.ts +339 -0
- package/src/core/commands/section-layout-commands.ts +680 -0
- package/src/core/commands/style-commands.ts +262 -0
- package/src/core/search/search-text.ts +329 -0
- package/src/core/selection/mapping.ts +41 -0
- package/src/core/state/editor-state.ts +1 -1
- package/src/index.ts +30 -0
- package/src/io/docx-session.ts +260 -39
- package/src/io/export/serialize-main-document.ts +202 -5
- package/src/io/export/serialize-numbering.ts +28 -7
- package/src/io/normalize/normalize-text.ts +63 -25
- package/src/io/ooxml/numbering-sentinels.ts +44 -0
- package/src/io/ooxml/parse-footnotes.ts +212 -20
- package/src/io/ooxml/parse-headers-footers.ts +229 -25
- package/src/io/ooxml/parse-inline-media.ts +16 -0
- package/src/io/ooxml/parse-main-document.ts +411 -6
- package/src/io/ooxml/parse-numbering.ts +7 -0
- package/src/io/ooxml/parse-settings.ts +184 -0
- package/src/io/ooxml/parse-shapes.ts +25 -0
- package/src/io/ooxml/parse-styles.ts +463 -0
- package/src/io/ooxml/parse-theme.ts +32 -0
- package/src/model/canonical-document.ts +133 -3
- package/src/model/cds-1.0.0.ts +13 -0
- package/src/model/snapshot.ts +2 -1
- package/src/runtime/document-layout.ts +332 -0
- package/src/runtime/document-navigation.ts +564 -0
- package/src/runtime/document-runtime.ts +265 -35
- package/src/runtime/document-search.ts +145 -0
- package/src/runtime/numbering-prefix.ts +47 -26
- package/src/runtime/page-layout-estimation.ts +212 -0
- package/src/runtime/read-only-diagnostics-runtime.ts +1 -0
- package/src/runtime/session-capabilities.ts +2 -0
- package/src/runtime/story-context.ts +164 -0
- package/src/runtime/story-targeting.ts +162 -0
- package/src/runtime/surface-projection.ts +239 -12
- package/src/runtime/table-schema.ts +87 -5
- package/src/runtime/view-state.ts +459 -0
- package/src/ui/WordReviewEditor.tsx +1902 -312
- package/src/ui/browser-export.ts +52 -0
- package/src/ui/headless/preserve-editor-selection.ts +5 -0
- package/src/ui/headless/selection-helpers.ts +20 -0
- package/src/ui/headless/selection-toolbar-model.ts +22 -0
- package/src/ui/headless/use-editor-keyboard.ts +6 -1
- package/src/ui-tailwind/chrome/tw-page-ruler.tsx +386 -0
- package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +125 -14
- package/src/ui-tailwind/editor-surface/perf-probe.ts +107 -0
- package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +45 -6
- package/src/ui-tailwind/editor-surface/pm-contextual-ui.ts +31 -0
- package/src/ui-tailwind/editor-surface/pm-position-map.ts +2 -2
- package/src/ui-tailwind/editor-surface/pm-schema.ts +47 -5
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +95 -22
- package/src/ui-tailwind/editor-surface/search-plugin.ts +19 -68
- package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +11 -0
- package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +394 -77
- package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +0 -1
- package/src/ui-tailwind/index.ts +2 -1
- package/src/ui-tailwind/review/tw-comment-sidebar.tsx +277 -147
- package/src/ui-tailwind/review/tw-review-rail.tsx +6 -6
- package/src/ui-tailwind/theme/editor-theme.css +123 -0
- package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +4 -0
- package/src/ui-tailwind/toolbar/tw-toolbar.tsx +291 -12
- package/src/ui-tailwind/tw-review-workspace.tsx +926 -27
- package/src/validation/compatibility-engine.ts +92 -20
- package/src/validation/diagnostics.ts +1 -0
- package/src/validation/docx-comment-proof.ts +487 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import type { DocumentSettings } from "../../model/canonical-document.ts";
|
|
2
|
+
|
|
3
|
+
interface XmlElementNode {
|
|
4
|
+
type: "element";
|
|
5
|
+
name: string;
|
|
6
|
+
attributes: Record<string, string>;
|
|
7
|
+
children: XmlNode[];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface XmlTextNode {
|
|
11
|
+
type: "text";
|
|
12
|
+
text: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
type XmlNode = XmlElementNode | XmlTextNode;
|
|
16
|
+
|
|
17
|
+
export function parseSettingsXml(xml: string): DocumentSettings {
|
|
18
|
+
if (!xml.trim()) {
|
|
19
|
+
return {};
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const root = parseXml(xml);
|
|
23
|
+
const settingsElement = findChildElementOptional(root, "settings");
|
|
24
|
+
if (!settingsElement) {
|
|
25
|
+
return {};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const evenAndOddHeaders = findChildElementOptional(settingsElement, "evenAndOddHeaders");
|
|
29
|
+
const zoom = findChildElementOptional(settingsElement, "zoom");
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
...(evenAndOddHeaders
|
|
33
|
+
? {
|
|
34
|
+
evenAndOddHeaders: readOnOffValue(evenAndOddHeaders, true),
|
|
35
|
+
}
|
|
36
|
+
: {}),
|
|
37
|
+
...(zoom ? readZoomLevel(zoom) : {}),
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function findChildElementOptional(
|
|
42
|
+
node: XmlElementNode,
|
|
43
|
+
childLocalName: string,
|
|
44
|
+
): XmlElementNode | undefined {
|
|
45
|
+
return node.children.find(
|
|
46
|
+
(entry): entry is XmlElementNode =>
|
|
47
|
+
entry.type === "element" && localName(entry.name) === childLocalName,
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function localName(name: string): string {
|
|
52
|
+
const idx = name.indexOf(":");
|
|
53
|
+
return idx >= 0 ? name.slice(idx + 1) : name;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function readOnOffValue(
|
|
57
|
+
element: XmlElementNode,
|
|
58
|
+
defaultValue: boolean,
|
|
59
|
+
): boolean {
|
|
60
|
+
const value =
|
|
61
|
+
element.attributes["w:val"] ??
|
|
62
|
+
element.attributes.val ??
|
|
63
|
+
(defaultValue ? "true" : "false");
|
|
64
|
+
return value !== "0" && value !== "false";
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function readZoomLevel(
|
|
68
|
+
element: XmlElementNode,
|
|
69
|
+
): Pick<DocumentSettings, "zoomLevel"> {
|
|
70
|
+
const rawValue = element.attributes["w:val"] ?? element.attributes.val;
|
|
71
|
+
if (rawValue === "bestFit") {
|
|
72
|
+
return { zoomLevel: "pageWidth" };
|
|
73
|
+
}
|
|
74
|
+
if (rawValue === "fullPage") {
|
|
75
|
+
return { zoomLevel: "onePage" };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const rawPercent =
|
|
79
|
+
element.attributes["w:percent"] ??
|
|
80
|
+
element.attributes.percent;
|
|
81
|
+
if (!rawPercent) {
|
|
82
|
+
return {};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const normalizedPercent = rawPercent.trim().replace(/%$/u, "");
|
|
86
|
+
const parsed = Number.parseInt(normalizedPercent, 10);
|
|
87
|
+
if (!Number.isFinite(parsed)) {
|
|
88
|
+
return {};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return { zoomLevel: parsed };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function parseXml(xml: string): XmlElementNode {
|
|
95
|
+
const root: XmlElementNode = {
|
|
96
|
+
type: "element",
|
|
97
|
+
name: "__root__",
|
|
98
|
+
attributes: {},
|
|
99
|
+
children: [],
|
|
100
|
+
};
|
|
101
|
+
const stack: XmlElementNode[] = [root];
|
|
102
|
+
let cursor = 0;
|
|
103
|
+
|
|
104
|
+
while (cursor < xml.length) {
|
|
105
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
106
|
+
const end = xml.indexOf("-->", cursor);
|
|
107
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (xml.startsWith("<?", cursor)) {
|
|
112
|
+
const end = xml.indexOf("?>", cursor);
|
|
113
|
+
cursor = end >= 0 ? end + 2 : xml.length;
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const nextLt = xml.indexOf("<", cursor);
|
|
118
|
+
if (nextLt < 0) {
|
|
119
|
+
pushText(xml.slice(cursor));
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (nextLt > cursor) {
|
|
124
|
+
pushText(xml.slice(cursor, nextLt));
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (xml.startsWith("</", nextLt)) {
|
|
128
|
+
const end = xml.indexOf(">", nextLt);
|
|
129
|
+
if (end < 0) break;
|
|
130
|
+
if (stack.length > 1) {
|
|
131
|
+
stack.pop();
|
|
132
|
+
}
|
|
133
|
+
cursor = end + 1;
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const end = xml.indexOf(">", nextLt);
|
|
138
|
+
if (end < 0) {
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const rawTag = xml.slice(nextLt + 1, end).trim();
|
|
143
|
+
const selfClosing = rawTag.endsWith("/");
|
|
144
|
+
const tagBody = selfClosing ? rawTag.slice(0, -1).trim() : rawTag;
|
|
145
|
+
const spaceIndex = tagBody.search(/\s/u);
|
|
146
|
+
const name =
|
|
147
|
+
spaceIndex >= 0 ? tagBody.slice(0, spaceIndex) : tagBody;
|
|
148
|
+
const attrs = spaceIndex >= 0 ? tagBody.slice(spaceIndex + 1) : "";
|
|
149
|
+
const element: XmlElementNode = {
|
|
150
|
+
type: "element",
|
|
151
|
+
name,
|
|
152
|
+
attributes: parseAttributes(attrs),
|
|
153
|
+
children: [],
|
|
154
|
+
};
|
|
155
|
+
stack[stack.length - 1]?.children.push(element);
|
|
156
|
+
if (!selfClosing) {
|
|
157
|
+
stack.push(element);
|
|
158
|
+
}
|
|
159
|
+
cursor = end + 1;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return root;
|
|
163
|
+
|
|
164
|
+
function pushText(raw: string): void {
|
|
165
|
+
const normalized = raw.replace(/\r\n?/gu, "\n");
|
|
166
|
+
if (!normalized.trim()) {
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
stack[stack.length - 1]?.children.push({
|
|
170
|
+
type: "text",
|
|
171
|
+
text: normalized,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function parseAttributes(raw: string): Record<string, string> {
|
|
177
|
+
const attributes: Record<string, string> = {};
|
|
178
|
+
const pattern = /([^\s=]+)\s*=\s*("([^"]*)"|'([^']*)')/gu;
|
|
179
|
+
for (const match of raw.matchAll(pattern)) {
|
|
180
|
+
const [, name, , dq, sq] = match;
|
|
181
|
+
attributes[name] = dq ?? sq ?? "";
|
|
182
|
+
}
|
|
183
|
+
return attributes;
|
|
184
|
+
}
|
|
@@ -15,8 +15,12 @@ const WPS_SHAPE_GRAPHIC_URI =
|
|
|
15
15
|
|
|
16
16
|
export interface ParsedWpsShape {
|
|
17
17
|
type: "shape";
|
|
18
|
+
/** True if this shape is a text box (geometry=rect with txbx content). */
|
|
19
|
+
isTextBox?: boolean;
|
|
18
20
|
/** Extracted text content from wps:txbx for display. */
|
|
19
21
|
text?: string;
|
|
22
|
+
/** Raw txbxContent XML for structured re-rendering. */
|
|
23
|
+
txbxContentXml?: string;
|
|
20
24
|
/** DrawML geometry preset, e.g. "rect", "roundRect". */
|
|
21
25
|
geometry?: string;
|
|
22
26
|
/** Original drawing XML for lossless round-trip export. */
|
|
@@ -81,9 +85,17 @@ export function parseShapeXml(drawingXml: string): ParsedWpsShape | ParsedWordAr
|
|
|
81
85
|
};
|
|
82
86
|
}
|
|
83
87
|
|
|
88
|
+
// Text box detection: rect or no geometry with text content
|
|
89
|
+
const isTextBox = Boolean(txbxContent && (!prst || prst === "rect"));
|
|
90
|
+
|
|
91
|
+
// Extract raw txbxContent XML for structured re-rendering of text boxes
|
|
92
|
+
const txbxContentXml = txbxContent ? extractRawXml(txbxContent, drawingXml) : undefined;
|
|
93
|
+
|
|
84
94
|
return {
|
|
85
95
|
type: "shape",
|
|
96
|
+
...(isTextBox ? { isTextBox: true } : {}),
|
|
86
97
|
...(text ? { text } : {}),
|
|
98
|
+
...(txbxContentXml ? { txbxContentXml } : {}),
|
|
87
99
|
...(prst ? { geometry: prst } : {}),
|
|
88
100
|
rawXml: drawingXml,
|
|
89
101
|
};
|
|
@@ -131,6 +143,19 @@ export function parseVmlXml(pictXml: string): ParsedVmlShape | null {
|
|
|
131
143
|
};
|
|
132
144
|
}
|
|
133
145
|
|
|
146
|
+
// ---- Raw XML extraction helpers ----
|
|
147
|
+
|
|
148
|
+
function extractRawXml(node: XmlElementNode, sourceXml: string): string | undefined {
|
|
149
|
+
// Find the txbxContent element boundaries in the source XML by tag name
|
|
150
|
+
const tagName = node.name;
|
|
151
|
+
const openIdx = sourceXml.indexOf(`<${tagName}`);
|
|
152
|
+
if (openIdx < 0) return undefined;
|
|
153
|
+
const closeTag = `</${tagName}>`;
|
|
154
|
+
const closeIdx = sourceXml.indexOf(closeTag, openIdx);
|
|
155
|
+
if (closeIdx < 0) return undefined;
|
|
156
|
+
return sourceXml.slice(openIdx, closeIdx + closeTag.length);
|
|
157
|
+
}
|
|
158
|
+
|
|
134
159
|
// ---- Text extraction helpers ----
|
|
135
160
|
|
|
136
161
|
function extractAllText(node: XmlElementNode): string {
|
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse styles.xml into the canonical StylesCatalog.
|
|
3
|
+
*
|
|
4
|
+
* Reads paragraph, character, and table style definitions plus latent-style
|
|
5
|
+
* metadata from the package styles part. This makes styles.xml the canonical
|
|
6
|
+
* source of style truth instead of synthesizing display names from referenced
|
|
7
|
+
* styleId values.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type {
|
|
11
|
+
CharacterStyleDefinition,
|
|
12
|
+
LatentStyleDefinition,
|
|
13
|
+
ParagraphStyleDefinition,
|
|
14
|
+
StylesCatalog,
|
|
15
|
+
TableStyleDefinition,
|
|
16
|
+
} from "../../model/canonical-document.ts";
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Inline XML node types (same pattern as parse-numbering.ts)
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
interface XmlElementNode {
|
|
23
|
+
type: "element";
|
|
24
|
+
name: string;
|
|
25
|
+
attributes: Record<string, string>;
|
|
26
|
+
children: XmlNode[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface XmlTextNode {
|
|
30
|
+
type: "text";
|
|
31
|
+
text: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
type XmlNode = XmlElementNode | XmlTextNode;
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Public interface
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
export interface ParseStylesResult {
|
|
41
|
+
catalog: StylesCatalog;
|
|
42
|
+
/** True when the catalog was parsed from actual styles.xml content. */
|
|
43
|
+
fromPackage: boolean;
|
|
44
|
+
/** Diagnostic notes for logging/proof. */
|
|
45
|
+
diagnostics: string[];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Parse a styles.xml string into a canonical StylesCatalog.
|
|
50
|
+
*
|
|
51
|
+
* Returns `fromPackage: false` when the input is empty or structurally empty,
|
|
52
|
+
* in which case the catalog fields will be empty records.
|
|
53
|
+
*/
|
|
54
|
+
export function parseStylesXml(xml: string): ParseStylesResult {
|
|
55
|
+
const diagnostics: string[] = [];
|
|
56
|
+
|
|
57
|
+
if (!xml || xml.trim().length === 0) {
|
|
58
|
+
diagnostics.push("styles.xml is empty; synthetic fallback will be used");
|
|
59
|
+
return {
|
|
60
|
+
catalog: { paragraphs: {}, characters: {}, tables: {} },
|
|
61
|
+
fromPackage: false,
|
|
62
|
+
diagnostics,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
let root: XmlElementNode;
|
|
67
|
+
try {
|
|
68
|
+
root = parseXml(xml);
|
|
69
|
+
} catch {
|
|
70
|
+
diagnostics.push("styles.xml could not be parsed; synthetic fallback will be used");
|
|
71
|
+
return {
|
|
72
|
+
catalog: { paragraphs: {}, characters: {}, tables: {} },
|
|
73
|
+
fromPackage: false,
|
|
74
|
+
diagnostics,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const stylesElement = findChildElementOptional(root, "styles");
|
|
79
|
+
if (!stylesElement) {
|
|
80
|
+
diagnostics.push("styles.xml has no <w:styles> root; synthetic fallback will be used");
|
|
81
|
+
return {
|
|
82
|
+
catalog: { paragraphs: {}, characters: {}, tables: {} },
|
|
83
|
+
fromPackage: false,
|
|
84
|
+
diagnostics,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const paragraphs: Record<string, ParagraphStyleDefinition> = {};
|
|
89
|
+
const characters: Record<string, CharacterStyleDefinition> = {};
|
|
90
|
+
const tables: Record<string, TableStyleDefinition> = {};
|
|
91
|
+
const latentStyles: Record<string, LatentStyleDefinition> = {};
|
|
92
|
+
|
|
93
|
+
for (const child of stylesElement.children) {
|
|
94
|
+
if (child.type !== "element") continue;
|
|
95
|
+
const local = localName(child.name);
|
|
96
|
+
|
|
97
|
+
if (local === "style") {
|
|
98
|
+
const styleType = child.attributes["w:type"] ?? child.attributes.type;
|
|
99
|
+
const styleId = child.attributes["w:styleId"] ?? child.attributes.styleId;
|
|
100
|
+
if (!styleId) continue;
|
|
101
|
+
|
|
102
|
+
const displayName = readStyleDisplayName(child) ?? styleId;
|
|
103
|
+
const basedOn = readLinkedStyleId(child, "basedOn");
|
|
104
|
+
const isDefault = (child.attributes["w:default"] ?? child.attributes.default) === "1";
|
|
105
|
+
|
|
106
|
+
switch (styleType) {
|
|
107
|
+
case "paragraph": {
|
|
108
|
+
const nextStyle = readLinkedStyleId(child, "next");
|
|
109
|
+
const outlineLevel = readParagraphStyleOutlineLevel(child);
|
|
110
|
+
paragraphs[styleId] = {
|
|
111
|
+
styleId,
|
|
112
|
+
displayName,
|
|
113
|
+
kind: "paragraph",
|
|
114
|
+
isDefault,
|
|
115
|
+
...(basedOn ? { basedOn } : {}),
|
|
116
|
+
...(nextStyle ? { nextStyle } : {}),
|
|
117
|
+
...(outlineLevel !== undefined ? { outlineLevel } : {}),
|
|
118
|
+
};
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
case "character": {
|
|
122
|
+
characters[styleId] = {
|
|
123
|
+
styleId,
|
|
124
|
+
displayName,
|
|
125
|
+
kind: "character",
|
|
126
|
+
isDefault,
|
|
127
|
+
...(basedOn ? { basedOn } : {}),
|
|
128
|
+
};
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
case "table": {
|
|
132
|
+
tables[styleId] = {
|
|
133
|
+
styleId,
|
|
134
|
+
displayName,
|
|
135
|
+
kind: "table",
|
|
136
|
+
isDefault,
|
|
137
|
+
...(basedOn ? { basedOn } : {}),
|
|
138
|
+
};
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
default:
|
|
142
|
+
// numbering/list styles are not part of the canonical catalog
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
} else if (local === "latentStyles") {
|
|
146
|
+
readLatentStyles(child, latentStyles);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const hasLatent = Object.keys(latentStyles).length > 0;
|
|
151
|
+
diagnostics.push(
|
|
152
|
+
`parsed ${Object.keys(paragraphs).length} paragraph, ` +
|
|
153
|
+
`${Object.keys(characters).length} character, ` +
|
|
154
|
+
`${Object.keys(tables).length} table styles` +
|
|
155
|
+
(hasLatent ? `, ${Object.keys(latentStyles).length} latent styles` : ""),
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
catalog: {
|
|
160
|
+
paragraphs,
|
|
161
|
+
characters,
|
|
162
|
+
tables,
|
|
163
|
+
...(hasLatent ? { latentStyles } : {}),
|
|
164
|
+
},
|
|
165
|
+
fromPackage: true,
|
|
166
|
+
diagnostics,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// Helpers
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
function readStyleDisplayName(styleNode: XmlElementNode): string | undefined {
|
|
175
|
+
const nameEl = findChildElementOptional(styleNode, "name");
|
|
176
|
+
if (!nameEl) return undefined;
|
|
177
|
+
return nameEl.attributes["w:val"] ?? nameEl.attributes.val ?? undefined;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function readLinkedStyleId(
|
|
181
|
+
styleNode: XmlElementNode,
|
|
182
|
+
elementLocalName: string,
|
|
183
|
+
): string | undefined {
|
|
184
|
+
const el = findChildElementOptional(styleNode, elementLocalName);
|
|
185
|
+
if (!el) return undefined;
|
|
186
|
+
return el.attributes["w:val"] ?? el.attributes.val ?? undefined;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function readParagraphStyleOutlineLevel(
|
|
190
|
+
styleNode: XmlElementNode,
|
|
191
|
+
): number | undefined {
|
|
192
|
+
const paragraphProperties = findChildElementOptional(styleNode, "pPr");
|
|
193
|
+
if (!paragraphProperties) {
|
|
194
|
+
return undefined;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const outlineLevel = findChildElementOptional(paragraphProperties, "outlineLvl");
|
|
198
|
+
if (!outlineLevel) {
|
|
199
|
+
return undefined;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const rawValue = outlineLevel.attributes["w:val"] ?? outlineLevel.attributes.val;
|
|
203
|
+
const parsed = rawValue !== undefined ? Number.parseInt(rawValue, 10) : Number.NaN;
|
|
204
|
+
return Number.isInteger(parsed) && parsed >= 0 ? parsed : undefined;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function readLatentStyles(
|
|
208
|
+
latentNode: XmlElementNode,
|
|
209
|
+
out: Record<string, LatentStyleDefinition>,
|
|
210
|
+
): void {
|
|
211
|
+
for (const child of latentNode.children) {
|
|
212
|
+
if (child.type !== "element" || localName(child.name) !== "lsdException") continue;
|
|
213
|
+
|
|
214
|
+
const name = child.attributes["w:name"] ?? child.attributes.name;
|
|
215
|
+
if (!name) continue;
|
|
216
|
+
|
|
217
|
+
const locked = readBoolAttr(child, "locked");
|
|
218
|
+
const semiHidden = readBoolAttr(child, "semiHidden");
|
|
219
|
+
const unhideWhenUsed = readBoolAttr(child, "unhideWhenUsed");
|
|
220
|
+
const qFormat = readBoolAttr(child, "qFormat");
|
|
221
|
+
const rawPriority = child.attributes["w:uiPriority"] ?? child.attributes.uiPriority;
|
|
222
|
+
const uiPriority = rawPriority !== undefined ? Number.parseInt(rawPriority, 10) : undefined;
|
|
223
|
+
|
|
224
|
+
out[name] = {
|
|
225
|
+
name,
|
|
226
|
+
...(locked !== undefined ? { locked } : {}),
|
|
227
|
+
...(semiHidden !== undefined ? { semiHidden } : {}),
|
|
228
|
+
...(unhideWhenUsed !== undefined ? { unhideWhenUsed } : {}),
|
|
229
|
+
...(qFormat !== undefined ? { qFormat } : {}),
|
|
230
|
+
...(uiPriority !== undefined && !Number.isNaN(uiPriority) ? { uiPriority } : {}),
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function readBoolAttr(node: XmlElementNode, attrLocalName: string): boolean | undefined {
|
|
236
|
+
const val = node.attributes[`w:${attrLocalName}`] ?? node.attributes[attrLocalName];
|
|
237
|
+
if (val === undefined) return undefined;
|
|
238
|
+
return val === "1" || val === "true";
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// ---------------------------------------------------------------------------
|
|
242
|
+
// Inline XML parser (same implementation as parse-numbering.ts)
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
function findChildElementOptional(
|
|
246
|
+
node: XmlElementNode,
|
|
247
|
+
childLocalName: string,
|
|
248
|
+
): XmlElementNode | undefined {
|
|
249
|
+
return node.children.find(
|
|
250
|
+
(entry): entry is XmlElementNode =>
|
|
251
|
+
entry.type === "element" && localName(entry.name) === childLocalName,
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function localName(name: string): string {
|
|
256
|
+
const separatorIndex = name.indexOf(":");
|
|
257
|
+
return separatorIndex >= 0 ? name.slice(separatorIndex + 1) : name;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function parseXml(xml: string): XmlElementNode {
|
|
261
|
+
const root: XmlElementNode = {
|
|
262
|
+
type: "element",
|
|
263
|
+
name: "__root__",
|
|
264
|
+
attributes: {},
|
|
265
|
+
children: [],
|
|
266
|
+
};
|
|
267
|
+
const stack: XmlElementNode[] = [root];
|
|
268
|
+
let cursor = 0;
|
|
269
|
+
|
|
270
|
+
while (cursor < xml.length) {
|
|
271
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
272
|
+
const end = xml.indexOf("-->", cursor);
|
|
273
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (xml.startsWith("<?", cursor)) {
|
|
278
|
+
const end = xml.indexOf("?>", cursor);
|
|
279
|
+
cursor = end >= 0 ? end + 2 : xml.length;
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (xml.startsWith("<![CDATA[", cursor)) {
|
|
284
|
+
const end = xml.indexOf("]]>", cursor);
|
|
285
|
+
const textEnd = end >= 0 ? end : xml.length;
|
|
286
|
+
stack[stack.length - 1]?.children.push({
|
|
287
|
+
type: "text",
|
|
288
|
+
text: xml.slice(cursor + 9, textEnd),
|
|
289
|
+
});
|
|
290
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (xml[cursor] !== "<") {
|
|
295
|
+
const nextTag = xml.indexOf("<", cursor);
|
|
296
|
+
const end = nextTag >= 0 ? nextTag : xml.length;
|
|
297
|
+
const text = decodeXmlEntities(xml.slice(cursor, end));
|
|
298
|
+
if (text.length > 0) {
|
|
299
|
+
stack[stack.length - 1]?.children.push({
|
|
300
|
+
type: "text",
|
|
301
|
+
text,
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
cursor = end;
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (xml[cursor + 1] === "/") {
|
|
309
|
+
const end = xml.indexOf(">", cursor);
|
|
310
|
+
if (end < 0) {
|
|
311
|
+
throw new Error("Malformed XML: missing closing >.");
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
const name = xml.slice(cursor + 2, end).trim();
|
|
315
|
+
const current = stack.pop();
|
|
316
|
+
if (!current || localName(current.name) !== localName(name)) {
|
|
317
|
+
throw new Error(`Malformed XML: unexpected closing tag </${name}>.`);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
cursor = end + 1;
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const tagEnd = findTagEnd(xml, cursor);
|
|
325
|
+
const tagBody = xml.slice(cursor + 1, tagEnd);
|
|
326
|
+
const selfClosing = /\/\s*$/.test(tagBody);
|
|
327
|
+
const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
|
|
328
|
+
const element: XmlElementNode = {
|
|
329
|
+
type: "element",
|
|
330
|
+
name,
|
|
331
|
+
attributes,
|
|
332
|
+
children: [],
|
|
333
|
+
};
|
|
334
|
+
stack[stack.length - 1]?.children.push(element);
|
|
335
|
+
|
|
336
|
+
if (!selfClosing) {
|
|
337
|
+
stack.push(element);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
cursor = tagEnd + 1;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (stack.length !== 1) {
|
|
344
|
+
throw new Error("Malformed XML: unclosed element.");
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return root;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function parseTag(tagBody: string): { name: string; attributes: Record<string, string> } {
|
|
351
|
+
let cursor = 0;
|
|
352
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
353
|
+
cursor += 1;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const nameStart = cursor;
|
|
357
|
+
while (cursor < tagBody.length && !/\s/.test(tagBody[cursor] ?? "")) {
|
|
358
|
+
cursor += 1;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
const name = tagBody.slice(nameStart, cursor);
|
|
362
|
+
const attributes: Record<string, string> = {};
|
|
363
|
+
|
|
364
|
+
while (cursor < tagBody.length) {
|
|
365
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
366
|
+
cursor += 1;
|
|
367
|
+
}
|
|
368
|
+
if (cursor >= tagBody.length) {
|
|
369
|
+
break;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const keyStart = cursor;
|
|
373
|
+
while (cursor < tagBody.length && !/[\s=]/.test(tagBody[cursor] ?? "")) {
|
|
374
|
+
cursor += 1;
|
|
375
|
+
}
|
|
376
|
+
const key = tagBody.slice(keyStart, cursor);
|
|
377
|
+
|
|
378
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
379
|
+
cursor += 1;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
if (tagBody[cursor] !== "=") {
|
|
383
|
+
attributes[key] = "";
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
cursor += 1;
|
|
387
|
+
|
|
388
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
389
|
+
cursor += 1;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const quote = tagBody[cursor];
|
|
393
|
+
if (quote !== `"` && quote !== `'`) {
|
|
394
|
+
throw new Error(`Malformed XML attribute ${key}.`);
|
|
395
|
+
}
|
|
396
|
+
cursor += 1;
|
|
397
|
+
|
|
398
|
+
const valueStart = cursor;
|
|
399
|
+
while (cursor < tagBody.length && tagBody[cursor] !== quote) {
|
|
400
|
+
cursor += 1;
|
|
401
|
+
}
|
|
402
|
+
const rawValue = tagBody.slice(valueStart, cursor);
|
|
403
|
+
attributes[key] = decodeXmlEntities(rawValue);
|
|
404
|
+
cursor += 1;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
return { name, attributes };
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function findTagEnd(xml: string, start: number): number {
|
|
411
|
+
let cursor = start + 1;
|
|
412
|
+
let quote: string | null = null;
|
|
413
|
+
|
|
414
|
+
while (cursor < xml.length) {
|
|
415
|
+
const current = xml[cursor];
|
|
416
|
+
if (quote) {
|
|
417
|
+
if (current === quote) {
|
|
418
|
+
quote = null;
|
|
419
|
+
}
|
|
420
|
+
cursor += 1;
|
|
421
|
+
continue;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
if (current === `"` || current === `'`) {
|
|
425
|
+
quote = current;
|
|
426
|
+
cursor += 1;
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
if (current === ">") {
|
|
431
|
+
return cursor;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
cursor += 1;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
throw new Error("Malformed XML: missing >.");
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
function decodeXmlEntities(value: string): string {
|
|
441
|
+
return value.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (match, entity) => {
|
|
442
|
+
switch (entity) {
|
|
443
|
+
case "amp":
|
|
444
|
+
return "&";
|
|
445
|
+
case "lt":
|
|
446
|
+
return "<";
|
|
447
|
+
case "gt":
|
|
448
|
+
return ">";
|
|
449
|
+
case "quot":
|
|
450
|
+
return `"`;
|
|
451
|
+
case "apos":
|
|
452
|
+
return "'";
|
|
453
|
+
default:
|
|
454
|
+
if (entity.startsWith("#x")) {
|
|
455
|
+
return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
|
|
456
|
+
}
|
|
457
|
+
if (entity.startsWith("#")) {
|
|
458
|
+
return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
|
|
459
|
+
}
|
|
460
|
+
return match;
|
|
461
|
+
}
|
|
462
|
+
});
|
|
463
|
+
}
|