@beyondwork/docx-react-component 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -104
- package/package.json +50 -30
- package/src/README.md +85 -0
- package/src/api/README.md +22 -0
- package/src/api/public-types.ts +525 -0
- package/src/compare/diff-engine.ts +530 -0
- package/src/compare/export-redlines.ts +162 -0
- package/src/compare/snapshot.ts +37 -0
- package/src/component-inventory.md +99 -0
- package/src/core/README.md +10 -0
- package/src/core/commands/README.md +3 -0
- package/src/core/commands/formatting-commands.ts +161 -0
- package/src/core/commands/image-commands.ts +144 -0
- package/src/core/commands/index.ts +1013 -0
- package/src/core/commands/list-commands.ts +370 -0
- package/src/core/commands/review-commands.ts +108 -0
- package/src/core/commands/text-commands.ts +119 -0
- package/src/core/schema/README.md +3 -0
- package/src/core/schema/text-schema.ts +512 -0
- package/src/core/selection/README.md +3 -0
- package/src/core/selection/mapping.ts +238 -0
- package/src/core/selection/review-anchors.ts +94 -0
- package/src/core/state/README.md +3 -0
- package/src/core/state/editor-state.ts +580 -0
- package/src/core/state/text-transaction.ts +276 -0
- package/src/formats/xlsx/io/parse-shared-strings.ts +41 -0
- package/src/formats/xlsx/io/parse-sheet.ts +289 -0
- package/src/formats/xlsx/io/parse-styles.ts +57 -0
- package/src/formats/xlsx/io/parse-workbook.ts +75 -0
- package/src/formats/xlsx/io/xlsx-session.ts +306 -0
- package/src/formats/xlsx/model/cell.ts +189 -0
- package/src/formats/xlsx/model/sheet.ts +244 -0
- package/src/formats/xlsx/model/styles.ts +118 -0
- package/src/formats/xlsx/model/workbook.ts +449 -0
- package/src/index.ts +45 -0
- package/src/io/README.md +10 -0
- package/src/io/docx-session.ts +1763 -0
- package/src/io/export/README.md +3 -0
- package/src/io/export/export-session.ts +165 -0
- package/src/io/export/minimal-docx.ts +115 -0
- package/src/io/export/reattach-preserved-parts.ts +54 -0
- package/src/io/export/serialize-comments.ts +876 -0
- package/src/io/export/serialize-footnotes.ts +217 -0
- package/src/io/export/serialize-headers-footers.ts +200 -0
- package/src/io/export/serialize-main-document.ts +982 -0
- package/src/io/export/serialize-numbering.ts +97 -0
- package/src/io/export/serialize-revisions.ts +389 -0
- package/src/io/export/serialize-runtime-revisions.ts +265 -0
- package/src/io/export/serialize-tables.ts +147 -0
- package/src/io/export/split-review-boundaries.ts +194 -0
- package/src/io/normalize/README.md +3 -0
- package/src/io/normalize/normalize-text.ts +437 -0
- package/src/io/ooxml/README.md +3 -0
- package/src/io/ooxml/parse-comments.ts +779 -0
- package/src/io/ooxml/parse-complex-content.ts +287 -0
- package/src/io/ooxml/parse-fields.ts +438 -0
- package/src/io/ooxml/parse-footnotes.ts +403 -0
- package/src/io/ooxml/parse-headers-footers.ts +483 -0
- package/src/io/ooxml/parse-inline-media.ts +431 -0
- package/src/io/ooxml/parse-main-document.ts +1846 -0
- package/src/io/ooxml/parse-numbering.ts +425 -0
- package/src/io/ooxml/parse-revisions.ts +658 -0
- package/src/io/ooxml/parse-shapes.ts +271 -0
- package/src/io/ooxml/parse-tables.ts +568 -0
- package/src/io/ooxml/parse-theme.ts +314 -0
- package/src/io/ooxml/part-manifest.ts +136 -0
- package/src/io/ooxml/revision-boundaries.ts +351 -0
- package/src/io/opc/README.md +3 -0
- package/src/io/opc/corrupt-package.ts +166 -0
- package/src/io/opc/docx-package.ts +74 -0
- package/src/io/opc/package-reader.ts +325 -0
- package/src/io/opc/package-writer.ts +273 -0
- package/src/legal/bookmarks.ts +196 -0
- package/src/legal/cross-references.ts +356 -0
- package/src/legal/defined-terms.ts +203 -0
- package/src/model/README.md +3 -0
- package/src/model/canonical-document.ts +1911 -0
- package/src/model/cds-1.0.0.ts +196 -0
- package/src/model/snapshot.ts +393 -0
- package/src/preservation/README.md +3 -0
- package/src/preservation/markup-compatibility.ts +48 -0
- package/src/preservation/opaque-fragment-store.ts +89 -0
- package/src/preservation/opaque-region.ts +233 -0
- package/src/preservation/package-preservation.ts +120 -0
- package/src/preservation/preserved-part-manifest.ts +56 -0
- package/src/preservation/relationship-retention.ts +57 -0
- package/src/preservation/store.ts +185 -0
- package/src/review/README.md +16 -0
- package/src/review/store/README.md +3 -0
- package/src/review/store/comment-anchors.ts +70 -0
- package/src/review/store/comment-remapping.ts +154 -0
- package/src/review/store/comment-store.ts +331 -0
- package/src/review/store/comment-thread.ts +109 -0
- package/src/review/store/revision-actions.ts +394 -0
- package/src/review/store/revision-store.ts +303 -0
- package/src/review/store/revision-types.ts +168 -0
- package/src/review/store/runtime-comment-store.ts +43 -0
- package/src/runtime/README.md +3 -0
- package/src/runtime/ai-action-policy.ts +764 -0
- package/src/runtime/document-runtime.ts +967 -0
- package/src/runtime/read-only-diagnostics-runtime.ts +232 -0
- package/src/runtime/review-runtime.ts +44 -0
- package/src/runtime/revision-runtime.ts +107 -0
- package/src/runtime/session-capabilities.ts +138 -0
- package/src/runtime/surface-projection.ts +570 -0
- package/src/runtime/table-commands.ts +87 -0
- package/src/runtime/table-schema.ts +140 -0
- package/src/runtime/virtualized-rendering.ts +258 -0
- package/src/ui/README.md +30 -0
- package/src/ui/WordReviewEditor.tsx +1506 -0
- package/src/ui/comments/README.md +3 -0
- package/src/ui/compatibility/README.md +3 -0
- package/src/ui/editor-surface/README.md +3 -0
- package/src/ui/headless/comment-decoration-model.ts +124 -0
- package/src/ui/headless/revision-decoration-model.ts +128 -0
- package/src/ui/headless/selection-helpers.ts +34 -0
- package/src/ui/headless/use-editor-keyboard.ts +98 -0
- package/src/ui/review/README.md +3 -0
- package/src/ui/shared/revision-filters.ts +31 -0
- package/src/ui/status/README.md +3 -0
- package/src/ui/theme/README.md +3 -0
- package/src/ui/toolbar/README.md +3 -0
- package/src/ui-tailwind/chrome/tw-alert-banner.tsx +48 -0
- package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +44 -0
- package/src/ui-tailwind/chrome/tw-unsaved-modal.tsx +58 -0
- package/src/ui-tailwind/chrome/use-before-unload.ts +20 -0
- package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +139 -0
- package/src/ui-tailwind/editor-surface/pm-decorations.ts +98 -0
- package/src/ui-tailwind/editor-surface/pm-position-map.ts +123 -0
- package/src/ui-tailwind/editor-surface/pm-schema.ts +452 -0
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +327 -0
- package/src/ui-tailwind/editor-surface/search-plugin.ts +157 -0
- package/src/ui-tailwind/editor-surface/tw-caret.tsx +12 -0
- package/src/ui-tailwind/editor-surface/tw-editor-surface.tsx +150 -0
- package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +118 -0
- package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +52 -0
- package/src/ui-tailwind/editor-surface/tw-paragraph-block.tsx +151 -0
- package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +215 -0
- package/src/ui-tailwind/editor-surface/tw-segment-view.tsx +111 -0
- package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +122 -0
- package/src/ui-tailwind/index.ts +61 -0
- package/src/ui-tailwind/review/tw-comment-sidebar.tsx +276 -0
- package/src/ui-tailwind/review/tw-health-panel.tsx +120 -0
- package/src/ui-tailwind/review/tw-review-rail.tsx +120 -0
- package/src/ui-tailwind/review/tw-revision-sidebar.tsx +164 -0
- package/src/ui-tailwind/status/tw-status-bar.tsx +58 -0
- package/src/ui-tailwind/theme/editor-theme.css +190 -0
- package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +48 -0
- package/src/ui-tailwind/toolbar/tw-toolbar.tsx +231 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +140 -0
- package/src/validation/README.md +3 -0
- package/src/validation/compatibility-engine.ts +317 -0
- package/src/validation/compatibility-report.ts +160 -0
- package/src/validation/diagnostics.ts +203 -0
- package/src/validation/import-diagnostics.ts +128 -0
- package/src/validation/low-priority-word-surfaces.ts +373 -0
- package/dist/chunk-32W6IVQE.js +0 -7725
- package/dist/chunk-32W6IVQE.js.map +0 -1
- package/dist/index.cjs +0 -23722
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.cts +0 -7
- package/dist/index.d.ts +0 -7
- package/dist/index.js +0 -16011
- package/dist/index.js.map +0 -1
- package/dist/public-types-DqCURAz8.d.cts +0 -1152
- package/dist/public-types-DqCURAz8.d.ts +0 -1152
- package/dist/tailwind.cjs +0 -8295
- package/dist/tailwind.cjs.map +0 -1
- package/dist/tailwind.d.cts +0 -323
- package/dist/tailwind.d.ts +0 -323
- package/dist/tailwind.js +0 -553
- package/dist/tailwind.js.map +0 -1
|
@@ -0,0 +1,1846 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
TextMark,
|
|
3
|
+
ParagraphBorders,
|
|
4
|
+
ParagraphShading,
|
|
5
|
+
ParagraphSpacing,
|
|
6
|
+
ParagraphIndentation,
|
|
7
|
+
TabStop,
|
|
8
|
+
TableLook,
|
|
9
|
+
} from "../../model/canonical-document.ts";
|
|
10
|
+
import type { OpcRelationship } from "./part-manifest.ts";
|
|
11
|
+
import {
|
|
12
|
+
parseInlineMediaXml,
|
|
13
|
+
type InlineMediaPart,
|
|
14
|
+
} from "./parse-inline-media.ts";
|
|
15
|
+
import { toCanonicalNumberingInstanceId } from "./parse-numbering.ts";
|
|
16
|
+
|
|
17
|
+
export interface ParsedMainDocument {
|
|
18
|
+
blocks: ParsedBlockNode[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export type ParsedBlockNode =
|
|
22
|
+
| ParsedParagraphNode
|
|
23
|
+
| ParsedTableBlockNode
|
|
24
|
+
| ParsedSdtNode
|
|
25
|
+
| ParsedCustomXmlNode
|
|
26
|
+
| ParsedAltChunkNode
|
|
27
|
+
| ParsedOpaqueBlockNode;
|
|
28
|
+
|
|
29
|
+
export interface ParsedParagraphNode {
|
|
30
|
+
type: "paragraph";
|
|
31
|
+
styleId?: string;
|
|
32
|
+
numbering?: {
|
|
33
|
+
numberingInstanceId: string;
|
|
34
|
+
level: number;
|
|
35
|
+
};
|
|
36
|
+
alignment?: "left" | "center" | "right" | "both" | "distribute";
|
|
37
|
+
spacing?: ParagraphSpacing;
|
|
38
|
+
indentation?: ParagraphIndentation;
|
|
39
|
+
tabStops?: TabStop[];
|
|
40
|
+
keepNext?: boolean;
|
|
41
|
+
keepLines?: boolean;
|
|
42
|
+
outlineLevel?: number;
|
|
43
|
+
pageBreakBefore?: boolean;
|
|
44
|
+
widowControl?: boolean;
|
|
45
|
+
borders?: ParagraphBorders;
|
|
46
|
+
shading?: ParagraphShading;
|
|
47
|
+
bidi?: boolean;
|
|
48
|
+
suppressLineNumbers?: boolean;
|
|
49
|
+
cnfStyle?: string;
|
|
50
|
+
children: ParsedInlineNode[];
|
|
51
|
+
rawXml: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export type ParsedInlineNode =
|
|
55
|
+
| ParsedTextNode
|
|
56
|
+
| ParsedBreakNode
|
|
57
|
+
| ParsedColumnBreakNode
|
|
58
|
+
| ParsedTabNode
|
|
59
|
+
| ParsedSymbolNode
|
|
60
|
+
| ParsedImageNode
|
|
61
|
+
| ParsedHyperlinkNode
|
|
62
|
+
| ParsedOpaqueInlineNode;
|
|
63
|
+
|
|
64
|
+
export interface ParsedTextNode {
|
|
65
|
+
type: "text";
|
|
66
|
+
text: string;
|
|
67
|
+
marks?: TextMark[];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface ParsedBreakNode {
|
|
71
|
+
type: "hard_break";
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface ParsedColumnBreakNode {
|
|
75
|
+
type: "column_break";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface ParsedTabNode {
|
|
79
|
+
type: "tab";
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export interface ParsedSymbolNode {
|
|
83
|
+
type: "symbol";
|
|
84
|
+
char: string;
|
|
85
|
+
font?: string;
|
|
86
|
+
marks?: TextMark[];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export interface ParsedImageNode {
|
|
90
|
+
type: "image";
|
|
91
|
+
mediaId: string;
|
|
92
|
+
relationshipId?: string;
|
|
93
|
+
packagePartName?: string;
|
|
94
|
+
contentType?: string;
|
|
95
|
+
filename?: string;
|
|
96
|
+
altText?: string;
|
|
97
|
+
placementXml?: string;
|
|
98
|
+
display?: "inline" | "floating";
|
|
99
|
+
floating?: {
|
|
100
|
+
horizontalPosition?: {
|
|
101
|
+
relativeFrom?: string;
|
|
102
|
+
align?: string;
|
|
103
|
+
offset?: number;
|
|
104
|
+
};
|
|
105
|
+
verticalPosition?: {
|
|
106
|
+
relativeFrom?: string;
|
|
107
|
+
align?: string;
|
|
108
|
+
offset?: number;
|
|
109
|
+
};
|
|
110
|
+
wrap?: "none" | "square" | "tight" | "through" | "topAndBottom";
|
|
111
|
+
behindDoc?: boolean;
|
|
112
|
+
layoutInCell?: boolean;
|
|
113
|
+
allowOverlap?: boolean;
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export interface ParsedHyperlinkNode {
|
|
118
|
+
type: "hyperlink";
|
|
119
|
+
href: string;
|
|
120
|
+
children: Array<ParsedTextNode | ParsedBreakNode | ParsedColumnBreakNode | ParsedTabNode | ParsedSymbolNode>;
|
|
121
|
+
rawXml: string;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export interface ParsedOpaqueInlineNode {
|
|
125
|
+
type: "opaque_inline";
|
|
126
|
+
rawXml: string;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export interface ParsedOpaqueBlockNode {
|
|
130
|
+
type: "opaque_block";
|
|
131
|
+
rawXml: string;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export interface ParsedSdtNode {
|
|
135
|
+
type: "sdt";
|
|
136
|
+
properties: {
|
|
137
|
+
sdtType?: string;
|
|
138
|
+
alias?: string;
|
|
139
|
+
tag?: string;
|
|
140
|
+
lock?: string;
|
|
141
|
+
propertiesXml?: string;
|
|
142
|
+
};
|
|
143
|
+
children: ParsedBlockNode[];
|
|
144
|
+
rawXml: string;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export interface ParsedCustomXmlNode {
|
|
148
|
+
type: "custom_xml";
|
|
149
|
+
uri?: string;
|
|
150
|
+
element?: string;
|
|
151
|
+
children: ParsedBlockNode[];
|
|
152
|
+
rawXml: string;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export interface ParsedAltChunkNode {
|
|
156
|
+
type: "alt_chunk";
|
|
157
|
+
relationshipId: string;
|
|
158
|
+
rawXml: string;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export interface ParsedTableBlockNode {
|
|
162
|
+
type: "table";
|
|
163
|
+
styleId?: string;
|
|
164
|
+
tblLook?: TableLook;
|
|
165
|
+
propertiesXml?: string;
|
|
166
|
+
gridColumns: number[];
|
|
167
|
+
rows: ParsedTableRowNode[];
|
|
168
|
+
rawXml: string;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export interface ParsedTableRowNode {
|
|
172
|
+
type: "table_row";
|
|
173
|
+
propertiesXml?: string;
|
|
174
|
+
cells: ParsedTableCellNode[];
|
|
175
|
+
rawXml: string;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export interface ParsedTableCellNode {
|
|
179
|
+
type: "table_cell";
|
|
180
|
+
propertiesXml?: string;
|
|
181
|
+
gridSpan?: number;
|
|
182
|
+
verticalMerge?: "restart" | "continue";
|
|
183
|
+
children: ParsedBlockNode[];
|
|
184
|
+
rawXml: string;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
interface XmlElementNode {
|
|
188
|
+
type: "element";
|
|
189
|
+
name: string;
|
|
190
|
+
attributes: Record<string, string>;
|
|
191
|
+
children: XmlNode[];
|
|
192
|
+
start: number;
|
|
193
|
+
end: number;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
interface XmlTextNode {
|
|
197
|
+
type: "text";
|
|
198
|
+
text: string;
|
|
199
|
+
start: number;
|
|
200
|
+
end: number;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
type XmlNode = XmlElementNode | XmlTextNode;
|
|
204
|
+
|
|
205
|
+
interface RunParseResult {
|
|
206
|
+
nodes: Array<ParsedTextNode | ParsedBreakNode | ParsedColumnBreakNode | ParsedTabNode | ParsedSymbolNode>;
|
|
207
|
+
supported: boolean;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
interface MarksParseResult {
|
|
211
|
+
marks: TextMark[];
|
|
212
|
+
supported: boolean;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const HYPERLINK_RELATIONSHIP_TYPE =
|
|
216
|
+
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
|
|
217
|
+
|
|
218
|
+
export function parseMainDocumentXml(
|
|
219
|
+
xml: string,
|
|
220
|
+
relationships: readonly OpcRelationship[] = [],
|
|
221
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart> = new Map(),
|
|
222
|
+
sourcePartPath = "/word/document.xml",
|
|
223
|
+
): ParsedMainDocument {
|
|
224
|
+
const root = parseXml(xml);
|
|
225
|
+
const documentElement = findChildElement(root, "document");
|
|
226
|
+
const bodyElement = findChildElement(documentElement, "body");
|
|
227
|
+
const relationshipMap = new Map(relationships.map((relationship) => [relationship.id, relationship]));
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
blocks: bodyElement.children
|
|
231
|
+
.filter((node): node is XmlElementNode => node.type === "element")
|
|
232
|
+
.map((node) => parseBodyChild(node, xml, relationshipMap, relationships, mediaParts, sourcePartPath)),
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function parseBodyChild(
|
|
237
|
+
node: XmlElementNode,
|
|
238
|
+
sourceXml: string,
|
|
239
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
240
|
+
relationships: readonly OpcRelationship[],
|
|
241
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
242
|
+
sourcePartPath: string,
|
|
243
|
+
): ParsedBlockNode {
|
|
244
|
+
const nodeType = localName(node.name);
|
|
245
|
+
|
|
246
|
+
if (nodeType === "tbl") {
|
|
247
|
+
// Tables with revision markup (tracked changes inside cells) stay opaque
|
|
248
|
+
// to preserve fidelity until revision-aware table editing is implemented
|
|
249
|
+
const rawTableXml = sourceXml.slice(node.start, node.end);
|
|
250
|
+
if (tableRequiresOpaquePreservation(rawTableXml)) {
|
|
251
|
+
return {
|
|
252
|
+
type: "opaque_block",
|
|
253
|
+
rawXml: rawTableXml,
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
try {
|
|
257
|
+
return parseTableElement(node, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath);
|
|
258
|
+
} catch {
|
|
259
|
+
// If table parsing fails for any reason, fall back to opaque preservation
|
|
260
|
+
return {
|
|
261
|
+
type: "opaque_block",
|
|
262
|
+
rawXml: rawTableXml,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
if (nodeType === "sdt") {
|
|
268
|
+
return parseSdtElement(node, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
if (nodeType === "customXml") {
|
|
272
|
+
return parseCustomXmlElement(node, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (nodeType === "altChunk") {
|
|
276
|
+
return parseAltChunkElement(node, sourceXml);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (nodeType !== "p") {
|
|
280
|
+
return {
|
|
281
|
+
type: "opaque_block",
|
|
282
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
let styleId: string | undefined;
|
|
287
|
+
let numbering: ParsedParagraphNode["numbering"];
|
|
288
|
+
let alignment: ParsedParagraphNode["alignment"];
|
|
289
|
+
let spacing: ParsedParagraphNode["spacing"];
|
|
290
|
+
let indentation: ParsedParagraphNode["indentation"];
|
|
291
|
+
let tabStops: ParsedParagraphNode["tabStops"];
|
|
292
|
+
let keepNext: ParsedParagraphNode["keepNext"];
|
|
293
|
+
let keepLines: ParsedParagraphNode["keepLines"];
|
|
294
|
+
let outlineLevel: ParsedParagraphNode["outlineLevel"];
|
|
295
|
+
let pageBreakBefore: ParsedParagraphNode["pageBreakBefore"];
|
|
296
|
+
let widowControl: ParsedParagraphNode["widowControl"];
|
|
297
|
+
let borders: ParsedParagraphNode["borders"];
|
|
298
|
+
let shading: ParsedParagraphNode["shading"];
|
|
299
|
+
let bidi: ParsedParagraphNode["bidi"];
|
|
300
|
+
let suppressLineNumbers: ParsedParagraphNode["suppressLineNumbers"];
|
|
301
|
+
let cnfStyle: ParsedParagraphNode["cnfStyle"];
|
|
302
|
+
let paragraphSupported = true;
|
|
303
|
+
const children: ParsedInlineNode[] = [];
|
|
304
|
+
|
|
305
|
+
for (const child of node.children) {
|
|
306
|
+
if (child.type !== "element") {
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
switch (localName(child.name)) {
|
|
311
|
+
case "pPr":
|
|
312
|
+
styleId = readParagraphStyleId(child);
|
|
313
|
+
numbering = readParagraphNumbering(child);
|
|
314
|
+
alignment = readParagraphAlignment(child);
|
|
315
|
+
spacing = readParagraphSpacing(child);
|
|
316
|
+
indentation = readParagraphIndentation(child);
|
|
317
|
+
tabStops = readParagraphTabStops(child);
|
|
318
|
+
keepNext = readOnOffParagraphProperty(child, "keepNext");
|
|
319
|
+
keepLines = readOnOffParagraphProperty(child, "keepLines");
|
|
320
|
+
outlineLevel = readParagraphOutlineLevel(child);
|
|
321
|
+
pageBreakBefore = readOnOffParagraphProperty(child, "pageBreakBefore");
|
|
322
|
+
widowControl = readOnOffParagraphProperty(child, "widowControl");
|
|
323
|
+
borders = readParagraphBorders(child);
|
|
324
|
+
shading = readParagraphShading(child);
|
|
325
|
+
bidi = readOnOffParagraphProperty(child, "bidi");
|
|
326
|
+
suppressLineNumbers = readOnOffParagraphProperty(child, "suppressLineNumbers");
|
|
327
|
+
cnfStyle = readParagraphCnfStyle(child);
|
|
328
|
+
paragraphSupported = paragraphSupported && supportsParagraphProperties(child);
|
|
329
|
+
break;
|
|
330
|
+
case "r":
|
|
331
|
+
children.push(...parseRun(child, sourceXml, relationships, mediaParts, sourcePartPath));
|
|
332
|
+
break;
|
|
333
|
+
case "hyperlink": {
|
|
334
|
+
const hyperlink = parseHyperlink(child, sourceXml, relationshipMap);
|
|
335
|
+
children.push(hyperlink);
|
|
336
|
+
break;
|
|
337
|
+
}
|
|
338
|
+
case "ins":
|
|
339
|
+
case "del": {
|
|
340
|
+
children.push(...parseRevisionContainer(child, sourceXml, relationshipMap));
|
|
341
|
+
break;
|
|
342
|
+
}
|
|
343
|
+
case "commentRangeStart":
|
|
344
|
+
case "commentRangeEnd":
|
|
345
|
+
break;
|
|
346
|
+
case "bookmarkStart":
|
|
347
|
+
case "bookmarkEnd":
|
|
348
|
+
case "permStart":
|
|
349
|
+
case "permEnd":
|
|
350
|
+
case "proofErr":
|
|
351
|
+
children.push({
|
|
352
|
+
type: "opaque_inline",
|
|
353
|
+
rawXml: sourceXml.slice(child.start, child.end),
|
|
354
|
+
});
|
|
355
|
+
break;
|
|
356
|
+
default:
|
|
357
|
+
children.push({
|
|
358
|
+
type: "opaque_inline",
|
|
359
|
+
rawXml: sourceXml.slice(child.start, child.end),
|
|
360
|
+
});
|
|
361
|
+
break;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if (!paragraphSupported) {
|
|
366
|
+
return {
|
|
367
|
+
type: "opaque_block",
|
|
368
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
type: "paragraph",
|
|
374
|
+
styleId,
|
|
375
|
+
...(numbering ? { numbering } : {}),
|
|
376
|
+
...(alignment ? { alignment } : {}),
|
|
377
|
+
...(spacing ? { spacing } : {}),
|
|
378
|
+
...(indentation ? { indentation } : {}),
|
|
379
|
+
...(tabStops && tabStops.length > 0 ? { tabStops } : {}),
|
|
380
|
+
...(keepNext ? { keepNext } : {}),
|
|
381
|
+
...(keepLines ? { keepLines } : {}),
|
|
382
|
+
...(outlineLevel !== undefined ? { outlineLevel } : {}),
|
|
383
|
+
...(pageBreakBefore ? { pageBreakBefore } : {}),
|
|
384
|
+
...(widowControl ? { widowControl } : {}),
|
|
385
|
+
...(borders ? { borders } : {}),
|
|
386
|
+
...(shading ? { shading } : {}),
|
|
387
|
+
...(bidi ? { bidi } : {}),
|
|
388
|
+
...(suppressLineNumbers ? { suppressLineNumbers } : {}),
|
|
389
|
+
...(cnfStyle ? { cnfStyle } : {}),
|
|
390
|
+
children,
|
|
391
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
function parseTableElement(
|
|
396
|
+
node: XmlElementNode,
|
|
397
|
+
sourceXml: string,
|
|
398
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
399
|
+
relationships: readonly OpcRelationship[],
|
|
400
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
401
|
+
sourcePartPath: string,
|
|
402
|
+
): ParsedTableBlockNode {
|
|
403
|
+
let styleId: string | undefined;
|
|
404
|
+
let tblLook: TableLook | undefined;
|
|
405
|
+
let propertiesXml: string | undefined;
|
|
406
|
+
let gridColumns: number[] = [];
|
|
407
|
+
const rows: ParsedTableRowNode[] = [];
|
|
408
|
+
|
|
409
|
+
for (const child of node.children) {
|
|
410
|
+
if (child.type !== "element") continue;
|
|
411
|
+
|
|
412
|
+
switch (localName(child.name)) {
|
|
413
|
+
case "tblPr": {
|
|
414
|
+
propertiesXml = sourceXml.slice(child.start, child.end);
|
|
415
|
+
styleId = readTableStyleId(child);
|
|
416
|
+
tblLook = readTableLook(child);
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
419
|
+
case "tblGrid": {
|
|
420
|
+
gridColumns = readTableGridColumns(child);
|
|
421
|
+
break;
|
|
422
|
+
}
|
|
423
|
+
case "tr": {
|
|
424
|
+
rows.push(parseTableRowElement(child, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath));
|
|
425
|
+
break;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return {
|
|
431
|
+
type: "table",
|
|
432
|
+
...(styleId ? { styleId } : {}),
|
|
433
|
+
...(tblLook ? { tblLook } : {}),
|
|
434
|
+
...(propertiesXml ? { propertiesXml } : {}),
|
|
435
|
+
gridColumns,
|
|
436
|
+
rows,
|
|
437
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
function parseTableRowElement(
|
|
442
|
+
node: XmlElementNode,
|
|
443
|
+
sourceXml: string,
|
|
444
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
445
|
+
relationships: readonly OpcRelationship[],
|
|
446
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
447
|
+
sourcePartPath: string,
|
|
448
|
+
): ParsedTableRowNode {
|
|
449
|
+
let propertiesXml: string | undefined;
|
|
450
|
+
const cells: ParsedTableCellNode[] = [];
|
|
451
|
+
|
|
452
|
+
for (const child of node.children) {
|
|
453
|
+
if (child.type !== "element") continue;
|
|
454
|
+
|
|
455
|
+
switch (localName(child.name)) {
|
|
456
|
+
case "trPr":
|
|
457
|
+
propertiesXml = sourceXml.slice(child.start, child.end);
|
|
458
|
+
break;
|
|
459
|
+
case "tc":
|
|
460
|
+
cells.push(parseTableCellElement(child, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath));
|
|
461
|
+
break;
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
type: "table_row",
|
|
467
|
+
...(propertiesXml ? { propertiesXml } : {}),
|
|
468
|
+
cells,
|
|
469
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
function parseTableCellElement(
|
|
474
|
+
node: XmlElementNode,
|
|
475
|
+
sourceXml: string,
|
|
476
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
477
|
+
relationships: readonly OpcRelationship[],
|
|
478
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
479
|
+
sourcePartPath: string,
|
|
480
|
+
): ParsedTableCellNode {
|
|
481
|
+
let propertiesXml: string | undefined;
|
|
482
|
+
let gridSpan: number | undefined;
|
|
483
|
+
let verticalMerge: "restart" | "continue" | undefined;
|
|
484
|
+
const children: ParsedBlockNode[] = [];
|
|
485
|
+
|
|
486
|
+
for (const child of node.children) {
|
|
487
|
+
if (child.type !== "element") continue;
|
|
488
|
+
|
|
489
|
+
switch (localName(child.name)) {
|
|
490
|
+
case "tcPr": {
|
|
491
|
+
propertiesXml = sourceXml.slice(child.start, child.end);
|
|
492
|
+
gridSpan = readCellGridSpan(child);
|
|
493
|
+
verticalMerge = readCellVerticalMerge(child);
|
|
494
|
+
break;
|
|
495
|
+
}
|
|
496
|
+
default: {
|
|
497
|
+
// Everything else in a cell is a block child (paragraphs, nested tables, etc.)
|
|
498
|
+
children.push(parseBodyChild(child, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath));
|
|
499
|
+
break;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
return {
|
|
505
|
+
type: "table_cell",
|
|
506
|
+
...(propertiesXml ? { propertiesXml } : {}),
|
|
507
|
+
...(gridSpan ? { gridSpan } : {}),
|
|
508
|
+
...(verticalMerge ? { verticalMerge } : {}),
|
|
509
|
+
children,
|
|
510
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
function parseSdtElement(
|
|
515
|
+
node: XmlElementNode,
|
|
516
|
+
sourceXml: string,
|
|
517
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
518
|
+
relationships: readonly OpcRelationship[],
|
|
519
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
520
|
+
sourcePartPath: string,
|
|
521
|
+
): ParsedBlockNode {
|
|
522
|
+
const propertiesNode = node.children.find(
|
|
523
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "sdtPr",
|
|
524
|
+
);
|
|
525
|
+
const contentNode = node.children.find(
|
|
526
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "sdtContent",
|
|
527
|
+
);
|
|
528
|
+
|
|
529
|
+
if (!contentNode) {
|
|
530
|
+
return {
|
|
531
|
+
type: "opaque_block",
|
|
532
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
const children = contentNode.children
|
|
537
|
+
.filter((child): child is XmlElementNode => child.type === "element")
|
|
538
|
+
.map((child) => parseBodyChild(child, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath));
|
|
539
|
+
|
|
540
|
+
return {
|
|
541
|
+
type: "sdt",
|
|
542
|
+
properties: readSdtProperties(propertiesNode, sourceXml),
|
|
543
|
+
children,
|
|
544
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
545
|
+
};
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function parseCustomXmlElement(
|
|
549
|
+
node: XmlElementNode,
|
|
550
|
+
sourceXml: string,
|
|
551
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
552
|
+
relationships: readonly OpcRelationship[],
|
|
553
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
554
|
+
sourcePartPath: string,
|
|
555
|
+
): ParsedBlockNode {
|
|
556
|
+
const uri = readOptionalAttribute(node, "uri");
|
|
557
|
+
const element = readOptionalAttribute(node, "element");
|
|
558
|
+
if (!uri && !element) {
|
|
559
|
+
return {
|
|
560
|
+
type: "opaque_block",
|
|
561
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
const children = node.children
|
|
566
|
+
.filter(
|
|
567
|
+
(child): child is XmlElementNode =>
|
|
568
|
+
child.type === "element" && localName(child.name) !== "customXmlPr",
|
|
569
|
+
)
|
|
570
|
+
.map((child) => parseBodyChild(child, sourceXml, relationshipMap, relationships, mediaParts, sourcePartPath));
|
|
571
|
+
|
|
572
|
+
return {
|
|
573
|
+
type: "custom_xml",
|
|
574
|
+
...(uri ? { uri } : {}),
|
|
575
|
+
...(element ? { element } : {}),
|
|
576
|
+
children,
|
|
577
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
578
|
+
};
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
function parseAltChunkElement(
|
|
582
|
+
node: XmlElementNode,
|
|
583
|
+
sourceXml: string,
|
|
584
|
+
): ParsedBlockNode {
|
|
585
|
+
const relationshipId = readOptionalAttribute(node, "id");
|
|
586
|
+
if (!relationshipId) {
|
|
587
|
+
return {
|
|
588
|
+
type: "opaque_block",
|
|
589
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
return {
|
|
594
|
+
type: "alt_chunk",
|
|
595
|
+
relationshipId,
|
|
596
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
597
|
+
};
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
function readSdtProperties(
|
|
601
|
+
node: XmlElementNode | undefined,
|
|
602
|
+
sourceXml: string,
|
|
603
|
+
): ParsedSdtNode["properties"] {
|
|
604
|
+
if (!node) {
|
|
605
|
+
return {};
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
const properties: ParsedSdtNode["properties"] = {
|
|
609
|
+
propertiesXml: sourceXml.slice(node.start, node.end),
|
|
610
|
+
};
|
|
611
|
+
|
|
612
|
+
for (const child of node.children) {
|
|
613
|
+
if (child.type !== "element") {
|
|
614
|
+
continue;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
const name = localName(child.name);
|
|
618
|
+
if (name === "alias") {
|
|
619
|
+
properties.alias = readOptionalAttribute(child, "val");
|
|
620
|
+
continue;
|
|
621
|
+
}
|
|
622
|
+
if (name === "tag") {
|
|
623
|
+
properties.tag = readOptionalAttribute(child, "val");
|
|
624
|
+
continue;
|
|
625
|
+
}
|
|
626
|
+
if (name === "lock") {
|
|
627
|
+
properties.lock = readOptionalAttribute(child, "val");
|
|
628
|
+
continue;
|
|
629
|
+
}
|
|
630
|
+
if (!properties.sdtType && name !== "id" && name !== "placeholder" && name !== "showingPlcHdr") {
|
|
631
|
+
properties.sdtType = name;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
return properties;
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
function readTableStyleId(node: XmlElementNode): string | undefined {
|
|
639
|
+
for (const child of node.children) {
|
|
640
|
+
if (child.type !== "element" || localName(child.name) !== "tblStyle") continue;
|
|
641
|
+
const styleId = child.attributes["w:val"] ?? child.attributes.val;
|
|
642
|
+
if (styleId) return styleId;
|
|
643
|
+
}
|
|
644
|
+
return undefined;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
function readTableLook(node: XmlElementNode): TableLook | undefined {
|
|
648
|
+
const tblLookNode = node.children.find(
|
|
649
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "tblLook",
|
|
650
|
+
);
|
|
651
|
+
if (!tblLookNode) {
|
|
652
|
+
return undefined;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
const tableLook: TableLook = {};
|
|
656
|
+
const val = tblLookNode.attributes["w:val"] ?? tblLookNode.attributes.val;
|
|
657
|
+
if (val) {
|
|
658
|
+
tableLook.val = val;
|
|
659
|
+
}
|
|
660
|
+
for (const [attribute, key] of [
|
|
661
|
+
["w:firstRow", "firstRow"],
|
|
662
|
+
["w:lastRow", "lastRow"],
|
|
663
|
+
["w:firstColumn", "firstColumn"],
|
|
664
|
+
["w:lastColumn", "lastColumn"],
|
|
665
|
+
["w:noHBand", "noHBand"],
|
|
666
|
+
["w:noVBand", "noVBand"],
|
|
667
|
+
] as const) {
|
|
668
|
+
const fallback = attribute.replace("w:", "");
|
|
669
|
+
const raw = tblLookNode.attributes[attribute] ?? tblLookNode.attributes[fallback];
|
|
670
|
+
if (raw !== undefined) {
|
|
671
|
+
tableLook[key] = raw !== "0" && raw !== "false" && raw !== "off";
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
return Object.keys(tableLook).length > 0 ? tableLook : undefined;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
function readTableGridColumns(node: XmlElementNode): number[] {
|
|
679
|
+
return node.children
|
|
680
|
+
.filter((child): child is XmlElementNode => child.type === "element" && localName(child.name) === "gridCol")
|
|
681
|
+
.map((child) => {
|
|
682
|
+
const raw = child.attributes["w:w"] ?? child.attributes.w ?? "0";
|
|
683
|
+
const value = Number.parseInt(raw, 10);
|
|
684
|
+
return Number.isFinite(value) && value > 0 ? value : 0;
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
/**
|
|
689
|
+
* Check if a table's raw XML contains content that cannot safely round-trip
|
|
690
|
+
* through the parsed table path yet. This includes:
|
|
691
|
+
* - Revision markup (tracked changes inside cells)
|
|
692
|
+
* - Hyperlink relationships (original relationship IDs would be lost)
|
|
693
|
+
* - Comment ranges, bookmarks, and other annotation markup
|
|
694
|
+
*
|
|
695
|
+
* Tables matching this check stay opaque until the respective features
|
|
696
|
+
* are implemented in the table editing path.
|
|
697
|
+
*/
|
|
698
|
+
function tableRequiresOpaquePreservation(rawXml: string): boolean {
|
|
699
|
+
// For now, only parse tables that contain exclusively simple content
|
|
700
|
+
// (plain text, basic formatting). Any complex OOXML stays opaque.
|
|
701
|
+
// This list will shrink as the table editing path gains feature coverage.
|
|
702
|
+
return /<w:(ins|del|rPrChange|pPrChange|tblPrChange|trPrChange|tcPrChange|sectPrChange|cellIns|cellDel|cellMerge|hyperlink|commentRangeStart|commentRangeEnd|commentReference|bookmarkStart|bookmarkEnd|rStyle|pict|fldChar|fldSimple|smartTag|gridAfter|gridBefore|hideMark|tblHeader|tblCellSpacing|bCs)\b/.test(rawXml);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
function readCellGridSpan(node: XmlElementNode): number | undefined {
|
|
706
|
+
const gridSpanNode = node.children.find(
|
|
707
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "gridSpan",
|
|
708
|
+
);
|
|
709
|
+
if (!gridSpanNode) return undefined;
|
|
710
|
+
const raw = gridSpanNode.attributes["w:val"] ?? gridSpanNode.attributes.val;
|
|
711
|
+
const value = Number.parseInt(raw ?? "0", 10);
|
|
712
|
+
return Number.isFinite(value) && value > 1 ? value : undefined;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
function readCellVerticalMerge(node: XmlElementNode): "restart" | "continue" | undefined {
|
|
716
|
+
const vMergeNode = node.children.find(
|
|
717
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "vMerge",
|
|
718
|
+
);
|
|
719
|
+
if (!vMergeNode) return undefined;
|
|
720
|
+
const raw = (vMergeNode.attributes["w:val"] ?? vMergeNode.attributes.val ?? "continue").toLowerCase();
|
|
721
|
+
return raw === "restart" ? "restart" : "continue";
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
function readParagraphAlignment(node: XmlElementNode): ParsedParagraphNode["alignment"] {
|
|
725
|
+
const jcNode = node.children.find(
|
|
726
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "jc",
|
|
727
|
+
);
|
|
728
|
+
if (!jcNode) return undefined;
|
|
729
|
+
const val = (jcNode.attributes["w:val"] ?? jcNode.attributes.val ?? "").toLowerCase();
|
|
730
|
+
if (val === "left" || val === "center" || val === "right" || val === "both" || val === "distribute") {
|
|
731
|
+
return val;
|
|
732
|
+
}
|
|
733
|
+
return undefined;
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
function readParagraphSpacing(node: XmlElementNode): ParagraphSpacing | undefined {
|
|
737
|
+
const spacingNode = node.children.find(
|
|
738
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "spacing",
|
|
739
|
+
);
|
|
740
|
+
if (!spacingNode) return undefined;
|
|
741
|
+
|
|
742
|
+
const spacing: ParagraphSpacing = {};
|
|
743
|
+
const before = spacingNode.attributes["w:before"] ?? spacingNode.attributes.before;
|
|
744
|
+
const after = spacingNode.attributes["w:after"] ?? spacingNode.attributes.after;
|
|
745
|
+
const line = spacingNode.attributes["w:line"] ?? spacingNode.attributes.line;
|
|
746
|
+
const lineRule = spacingNode.attributes["w:lineRule"] ?? spacingNode.attributes.lineRule;
|
|
747
|
+
|
|
748
|
+
if (before !== undefined) {
|
|
749
|
+
const v = Number.parseInt(before, 10);
|
|
750
|
+
if (Number.isFinite(v)) spacing.before = v;
|
|
751
|
+
}
|
|
752
|
+
if (after !== undefined) {
|
|
753
|
+
const v = Number.parseInt(after, 10);
|
|
754
|
+
if (Number.isFinite(v)) spacing.after = v;
|
|
755
|
+
}
|
|
756
|
+
if (line !== undefined) {
|
|
757
|
+
const v = Number.parseInt(line, 10);
|
|
758
|
+
if (Number.isFinite(v)) spacing.line = v;
|
|
759
|
+
}
|
|
760
|
+
if (lineRule !== undefined) {
|
|
761
|
+
const lr = lineRule.toLowerCase();
|
|
762
|
+
if (lr === "auto" || lr === "exact") {
|
|
763
|
+
spacing.lineRule = lr;
|
|
764
|
+
} else if (lr === "atleast") {
|
|
765
|
+
spacing.lineRule = "atLeast";
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
if (
|
|
770
|
+
spacing.before === undefined &&
|
|
771
|
+
spacing.after === undefined &&
|
|
772
|
+
spacing.line === undefined &&
|
|
773
|
+
spacing.lineRule === undefined
|
|
774
|
+
) {
|
|
775
|
+
return undefined;
|
|
776
|
+
}
|
|
777
|
+
return spacing;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
function readParagraphIndentation(node: XmlElementNode): ParagraphIndentation | undefined {
|
|
781
|
+
const indNode = node.children.find(
|
|
782
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "ind",
|
|
783
|
+
);
|
|
784
|
+
if (!indNode) return undefined;
|
|
785
|
+
|
|
786
|
+
const indentation: ParagraphIndentation = {};
|
|
787
|
+
const left = indNode.attributes["w:left"] ?? indNode.attributes.left;
|
|
788
|
+
const right = indNode.attributes["w:right"] ?? indNode.attributes.right;
|
|
789
|
+
const firstLine = indNode.attributes["w:firstLine"] ?? indNode.attributes.firstLine;
|
|
790
|
+
const hanging = indNode.attributes["w:hanging"] ?? indNode.attributes.hanging;
|
|
791
|
+
|
|
792
|
+
if (left !== undefined) {
|
|
793
|
+
const v = Number.parseInt(left, 10);
|
|
794
|
+
if (Number.isFinite(v)) indentation.left = v;
|
|
795
|
+
}
|
|
796
|
+
if (right !== undefined) {
|
|
797
|
+
const v = Number.parseInt(right, 10);
|
|
798
|
+
if (Number.isFinite(v)) indentation.right = v;
|
|
799
|
+
}
|
|
800
|
+
if (firstLine !== undefined) {
|
|
801
|
+
const v = Number.parseInt(firstLine, 10);
|
|
802
|
+
if (Number.isFinite(v)) indentation.firstLine = v;
|
|
803
|
+
}
|
|
804
|
+
if (hanging !== undefined) {
|
|
805
|
+
const v = Number.parseInt(hanging, 10);
|
|
806
|
+
if (Number.isFinite(v)) indentation.hanging = v;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
if (
|
|
810
|
+
indentation.left === undefined &&
|
|
811
|
+
indentation.right === undefined &&
|
|
812
|
+
indentation.firstLine === undefined &&
|
|
813
|
+
indentation.hanging === undefined
|
|
814
|
+
) {
|
|
815
|
+
return undefined;
|
|
816
|
+
}
|
|
817
|
+
return indentation;
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
function readParagraphTabStops(node: XmlElementNode): TabStop[] | undefined {
|
|
821
|
+
const tabsNode = node.children.find(
|
|
822
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "tabs",
|
|
823
|
+
);
|
|
824
|
+
if (!tabsNode) return undefined;
|
|
825
|
+
|
|
826
|
+
const tabStops: TabStop[] = [];
|
|
827
|
+
for (const child of tabsNode.children) {
|
|
828
|
+
if (child.type !== "element" || localName(child.name) !== "tab") continue;
|
|
829
|
+
const pos = child.attributes["w:pos"] ?? child.attributes.pos;
|
|
830
|
+
const val = (child.attributes["w:val"] ?? child.attributes.val ?? "left").toLowerCase();
|
|
831
|
+
const leader = (child.attributes["w:leader"] ?? child.attributes.leader ?? "none").toLowerCase();
|
|
832
|
+
|
|
833
|
+
if (pos === undefined) continue;
|
|
834
|
+
const position = Number.parseInt(pos, 10);
|
|
835
|
+
if (!Number.isFinite(position)) continue;
|
|
836
|
+
|
|
837
|
+
const align = (["left", "center", "right", "decimal", "bar", "clear"] as const).includes(
|
|
838
|
+
val as "left" | "center" | "right" | "decimal" | "bar" | "clear",
|
|
839
|
+
)
|
|
840
|
+
? (val as TabStop["align"])
|
|
841
|
+
: "left";
|
|
842
|
+
|
|
843
|
+
const leaderValue =
|
|
844
|
+
leader === "none" ||
|
|
845
|
+
leader === "dot" ||
|
|
846
|
+
leader === "hyphen" ||
|
|
847
|
+
leader === "underscore" ||
|
|
848
|
+
leader === "heavy"
|
|
849
|
+
? (leader as Exclude<TabStop["leader"], "middleDot">)
|
|
850
|
+
: leader === "middledot"
|
|
851
|
+
? "middleDot"
|
|
852
|
+
: undefined;
|
|
853
|
+
|
|
854
|
+
tabStops.push({
|
|
855
|
+
position,
|
|
856
|
+
align,
|
|
857
|
+
...(leaderValue && leaderValue !== "none" ? { leader: leaderValue } : {}),
|
|
858
|
+
});
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
return tabStops.length > 0 ? tabStops : undefined;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
function readOnOffParagraphProperty(node: XmlElementNode, name: string): boolean | undefined {
|
|
865
|
+
const propNode = node.children.find(
|
|
866
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === name,
|
|
867
|
+
);
|
|
868
|
+
if (!propNode) return undefined;
|
|
869
|
+
const val = (propNode.attributes["w:val"] ?? propNode.attributes.val ?? "true").toLowerCase();
|
|
870
|
+
return val !== "false" && val !== "0" && val !== "off" ? true : undefined;
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
function readParagraphOutlineLevel(node: XmlElementNode): number | undefined {
|
|
874
|
+
const propNode = node.children.find(
|
|
875
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "outlineLvl",
|
|
876
|
+
);
|
|
877
|
+
if (!propNode) return undefined;
|
|
878
|
+
const val = propNode.attributes["w:val"] ?? propNode.attributes.val;
|
|
879
|
+
if (val === undefined) return undefined;
|
|
880
|
+
const level = Number.parseInt(val, 10);
|
|
881
|
+
return Number.isFinite(level) ? level : undefined;
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
function readParagraphBorders(node: XmlElementNode): ParagraphBorders | undefined {
|
|
885
|
+
const borderContainer = node.children.find(
|
|
886
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "pBdr",
|
|
887
|
+
);
|
|
888
|
+
if (!borderContainer) {
|
|
889
|
+
return undefined;
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
const borders: ParagraphBorders = {};
|
|
893
|
+
for (const [name, key] of [
|
|
894
|
+
["top", "top"],
|
|
895
|
+
["left", "left"],
|
|
896
|
+
["bottom", "bottom"],
|
|
897
|
+
["right", "right"],
|
|
898
|
+
["bar", "bar"],
|
|
899
|
+
["between", "between"],
|
|
900
|
+
] as const) {
|
|
901
|
+
const borderNode = borderContainer.children.find(
|
|
902
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === name,
|
|
903
|
+
);
|
|
904
|
+
if (borderNode) {
|
|
905
|
+
const border = readBorder(borderNode);
|
|
906
|
+
if (border) {
|
|
907
|
+
borders[key] = border;
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
return Object.keys(borders).length > 0 ? borders : undefined;
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
function readParagraphShading(node: XmlElementNode): ParagraphShading | undefined {
|
|
916
|
+
const shadingNode = node.children.find(
|
|
917
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "shd",
|
|
918
|
+
);
|
|
919
|
+
if (!shadingNode) {
|
|
920
|
+
return undefined;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
const shading: ParagraphShading = {};
|
|
924
|
+
const fill = shadingNode.attributes["w:fill"] ?? shadingNode.attributes.fill;
|
|
925
|
+
const color = shadingNode.attributes["w:color"] ?? shadingNode.attributes.color;
|
|
926
|
+
const val = shadingNode.attributes["w:val"] ?? shadingNode.attributes.val;
|
|
927
|
+
if (fill) shading.fill = fill;
|
|
928
|
+
if (color) shading.color = color;
|
|
929
|
+
if (val) shading.val = val;
|
|
930
|
+
return Object.keys(shading).length > 0 ? shading : undefined;
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
function readParagraphCnfStyle(node: XmlElementNode): string | undefined {
|
|
934
|
+
const cnfStyleNode = node.children.find(
|
|
935
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "cnfStyle",
|
|
936
|
+
);
|
|
937
|
+
return cnfStyleNode?.attributes["w:val"] ?? cnfStyleNode?.attributes.val;
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
function readBorder(node: XmlElementNode): ParagraphBorders[keyof ParagraphBorders] {
|
|
941
|
+
const border: NonNullable<ParagraphBorders[keyof ParagraphBorders]> = {};
|
|
942
|
+
const value = node.attributes["w:val"] ?? node.attributes.val;
|
|
943
|
+
const size = node.attributes["w:sz"] ?? node.attributes.sz;
|
|
944
|
+
const space = node.attributes["w:space"] ?? node.attributes.space;
|
|
945
|
+
const color = node.attributes["w:color"] ?? node.attributes.color;
|
|
946
|
+
if (value) border.value = value;
|
|
947
|
+
if (size !== undefined) {
|
|
948
|
+
const parsedSize = Number.parseInt(size, 10);
|
|
949
|
+
if (Number.isFinite(parsedSize)) border.size = parsedSize;
|
|
950
|
+
}
|
|
951
|
+
if (space !== undefined) {
|
|
952
|
+
const parsedSpace = Number.parseInt(space, 10);
|
|
953
|
+
if (Number.isFinite(parsedSpace)) border.space = parsedSpace;
|
|
954
|
+
}
|
|
955
|
+
if (color) border.color = color;
|
|
956
|
+
return Object.keys(border).length > 0 ? border : undefined;
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
function readParagraphStyleId(node: XmlElementNode): string | undefined {
|
|
960
|
+
for (const child of node.children) {
|
|
961
|
+
if (child.type !== "element" || localName(child.name) !== "pStyle") {
|
|
962
|
+
continue;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
const styleId = child.attributes["w:val"] ?? child.attributes.val;
|
|
966
|
+
if (styleId) {
|
|
967
|
+
return styleId;
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
return undefined;
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
function readParagraphNumbering(
|
|
975
|
+
node: XmlElementNode,
|
|
976
|
+
): ParsedParagraphNode["numbering"] | undefined {
|
|
977
|
+
const numberingProperties = node.children.find(
|
|
978
|
+
(child): child is XmlElementNode =>
|
|
979
|
+
child.type === "element" && localName(child.name) === "numPr",
|
|
980
|
+
);
|
|
981
|
+
if (!numberingProperties) {
|
|
982
|
+
return undefined;
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
const levelNode = numberingProperties.children.find(
|
|
986
|
+
(child): child is XmlElementNode =>
|
|
987
|
+
child.type === "element" && localName(child.name) === "ilvl",
|
|
988
|
+
);
|
|
989
|
+
const instanceNode = numberingProperties.children.find(
|
|
990
|
+
(child): child is XmlElementNode =>
|
|
991
|
+
child.type === "element" && localName(child.name) === "numId",
|
|
992
|
+
);
|
|
993
|
+
const rawLevel = levelNode?.attributes["w:val"] ?? levelNode?.attributes.val;
|
|
994
|
+
const rawInstanceId = instanceNode?.attributes["w:val"] ?? instanceNode?.attributes.val;
|
|
995
|
+
if (!rawInstanceId || rawLevel === undefined || !/^-?\d+$/.test(rawLevel)) {
|
|
996
|
+
return undefined;
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
return {
|
|
1000
|
+
numberingInstanceId: toCanonicalNumberingInstanceId(rawInstanceId),
|
|
1001
|
+
level: Number.parseInt(rawLevel, 10),
|
|
1002
|
+
};
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
function parseRun(
|
|
1006
|
+
node: XmlElementNode,
|
|
1007
|
+
sourceXml: string,
|
|
1008
|
+
relationships: readonly OpcRelationship[],
|
|
1009
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
1010
|
+
sourcePartPath: string,
|
|
1011
|
+
): ParsedInlineNode[] {
|
|
1012
|
+
const marksResult = readRunMarks(node, sourceXml);
|
|
1013
|
+
if (!marksResult.supported) {
|
|
1014
|
+
return [
|
|
1015
|
+
{
|
|
1016
|
+
type: "opaque_inline",
|
|
1017
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1018
|
+
},
|
|
1019
|
+
];
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
const marks = marksResult.marks;
|
|
1023
|
+
const result: ParsedInlineNode[] = [];
|
|
1024
|
+
let encounteredUnsupportedChild = false;
|
|
1025
|
+
|
|
1026
|
+
for (const child of node.children) {
|
|
1027
|
+
if (child.type !== "element") {
|
|
1028
|
+
continue;
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
switch (localName(child.name)) {
|
|
1032
|
+
case "rPr":
|
|
1033
|
+
break;
|
|
1034
|
+
case "t": {
|
|
1035
|
+
const text = child.children
|
|
1036
|
+
.filter((entry): entry is XmlTextNode => entry.type === "text")
|
|
1037
|
+
.map((entry) => entry.text)
|
|
1038
|
+
.join("");
|
|
1039
|
+
result.push({
|
|
1040
|
+
type: "text",
|
|
1041
|
+
text,
|
|
1042
|
+
...(marks.length > 0 ? { marks } : {}),
|
|
1043
|
+
});
|
|
1044
|
+
break;
|
|
1045
|
+
}
|
|
1046
|
+
case "tab":
|
|
1047
|
+
result.push({ type: "tab" });
|
|
1048
|
+
break;
|
|
1049
|
+
case "sym": {
|
|
1050
|
+
const symbol = parseSymbolNode(child, marks);
|
|
1051
|
+
if (!symbol) {
|
|
1052
|
+
encounteredUnsupportedChild = true;
|
|
1053
|
+
result.push({
|
|
1054
|
+
type: "opaque_inline",
|
|
1055
|
+
rawXml: sourceXml.slice(child.start, child.end),
|
|
1056
|
+
});
|
|
1057
|
+
break;
|
|
1058
|
+
}
|
|
1059
|
+
result.push(symbol);
|
|
1060
|
+
break;
|
|
1061
|
+
}
|
|
1062
|
+
case "br":
|
|
1063
|
+
if (isColumnBreak(child)) {
|
|
1064
|
+
result.push({ type: "column_break" });
|
|
1065
|
+
} else if (isSimpleLineBreak(child)) {
|
|
1066
|
+
result.push({ type: "hard_break" });
|
|
1067
|
+
} else {
|
|
1068
|
+
result.push({
|
|
1069
|
+
type: "opaque_inline",
|
|
1070
|
+
rawXml: sourceXml.slice(child.start, child.end),
|
|
1071
|
+
});
|
|
1072
|
+
}
|
|
1073
|
+
break;
|
|
1074
|
+
case "drawing": {
|
|
1075
|
+
const parsedMedia = parseInlineMediaXml(
|
|
1076
|
+
sourceXml.slice(child.start, child.end),
|
|
1077
|
+
relationships,
|
|
1078
|
+
mediaParts,
|
|
1079
|
+
sourcePartPath,
|
|
1080
|
+
);
|
|
1081
|
+
if (parsedMedia.length === 0) {
|
|
1082
|
+
encounteredUnsupportedChild = true;
|
|
1083
|
+
result.push({
|
|
1084
|
+
type: "opaque_inline",
|
|
1085
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1086
|
+
});
|
|
1087
|
+
break;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
const semanticChildren = node.children.filter(
|
|
1091
|
+
(entry): entry is XmlElementNode =>
|
|
1092
|
+
entry.type === "element" && localName(entry.name) !== "rPr",
|
|
1093
|
+
);
|
|
1094
|
+
const placementXml =
|
|
1095
|
+
semanticChildren.length === 1
|
|
1096
|
+
? sourceXml.slice(node.start, node.end)
|
|
1097
|
+
: sourceXml.slice(child.start, child.end);
|
|
1098
|
+
|
|
1099
|
+
result.push(
|
|
1100
|
+
...parsedMedia.map((media) => ({
|
|
1101
|
+
type: "image" as const,
|
|
1102
|
+
mediaId: media.mediaId,
|
|
1103
|
+
relationshipId: media.relationshipId,
|
|
1104
|
+
packagePartName: media.packagePartName,
|
|
1105
|
+
contentType: media.contentType,
|
|
1106
|
+
filename: media.filename,
|
|
1107
|
+
...(media.altText ? { altText: media.altText } : {}),
|
|
1108
|
+
placementXml,
|
|
1109
|
+
...(media.display ? { display: media.display } : {}),
|
|
1110
|
+
...(media.floating ? { floating: media.floating } : {}),
|
|
1111
|
+
})),
|
|
1112
|
+
);
|
|
1113
|
+
break;
|
|
1114
|
+
}
|
|
1115
|
+
case "commentReference":
|
|
1116
|
+
break;
|
|
1117
|
+
case "lastRenderedPageBreak":
|
|
1118
|
+
case "proofErr":
|
|
1119
|
+
result.push({
|
|
1120
|
+
type: "opaque_inline",
|
|
1121
|
+
rawXml: sourceXml.slice(child.start, child.end),
|
|
1122
|
+
});
|
|
1123
|
+
break;
|
|
1124
|
+
default:
|
|
1125
|
+
encounteredUnsupportedChild = true;
|
|
1126
|
+
result.push({
|
|
1127
|
+
type: "opaque_inline",
|
|
1128
|
+
rawXml: sourceXml.slice(child.start, child.end),
|
|
1129
|
+
});
|
|
1130
|
+
break;
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
if (encounteredUnsupportedChild && result.every((child) => child.type === "opaque_inline")) {
|
|
1135
|
+
return [
|
|
1136
|
+
{
|
|
1137
|
+
type: "opaque_inline",
|
|
1138
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1139
|
+
},
|
|
1140
|
+
];
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
return result;
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
function parseRevisionContainer(
|
|
1147
|
+
node: XmlElementNode,
|
|
1148
|
+
sourceXml: string,
|
|
1149
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
1150
|
+
): ParsedInlineNode[] {
|
|
1151
|
+
const result: ParsedInlineNode[] = [];
|
|
1152
|
+
const allowsDeletedText = localName(node.name) === "del";
|
|
1153
|
+
|
|
1154
|
+
for (const child of node.children) {
|
|
1155
|
+
if (child.type !== "element") {
|
|
1156
|
+
continue;
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
switch (localName(child.name)) {
|
|
1160
|
+
case "r": {
|
|
1161
|
+
const run = parseRunContentOnly(child, sourceXml, {
|
|
1162
|
+
allowDeletedText: allowsDeletedText,
|
|
1163
|
+
preserveUnsupportedReviewMarkup: true,
|
|
1164
|
+
});
|
|
1165
|
+
if (!run.supported) {
|
|
1166
|
+
return [
|
|
1167
|
+
{
|
|
1168
|
+
type: "opaque_inline",
|
|
1169
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1170
|
+
},
|
|
1171
|
+
];
|
|
1172
|
+
}
|
|
1173
|
+
result.push(...run.nodes);
|
|
1174
|
+
break;
|
|
1175
|
+
}
|
|
1176
|
+
case "hyperlink": {
|
|
1177
|
+
const hyperlink = parseHyperlink(child, sourceXml, relationshipMap, {
|
|
1178
|
+
allowDeletedText: allowsDeletedText,
|
|
1179
|
+
preserveUnsupportedReviewMarkup: true,
|
|
1180
|
+
});
|
|
1181
|
+
if (hyperlink.type === "opaque_inline") {
|
|
1182
|
+
return [
|
|
1183
|
+
{
|
|
1184
|
+
type: "opaque_inline",
|
|
1185
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1186
|
+
},
|
|
1187
|
+
];
|
|
1188
|
+
}
|
|
1189
|
+
result.push(hyperlink);
|
|
1190
|
+
break;
|
|
1191
|
+
}
|
|
1192
|
+
case "commentRangeStart":
|
|
1193
|
+
case "commentRangeEnd":
|
|
1194
|
+
case "bookmarkStart":
|
|
1195
|
+
case "bookmarkEnd":
|
|
1196
|
+
case "permStart":
|
|
1197
|
+
case "permEnd":
|
|
1198
|
+
case "proofErr":
|
|
1199
|
+
case "lastRenderedPageBreak":
|
|
1200
|
+
return [
|
|
1201
|
+
{
|
|
1202
|
+
type: "opaque_inline",
|
|
1203
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1204
|
+
},
|
|
1205
|
+
];
|
|
1206
|
+
default:
|
|
1207
|
+
return [
|
|
1208
|
+
{
|
|
1209
|
+
type: "opaque_inline",
|
|
1210
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1211
|
+
},
|
|
1212
|
+
];
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
return result;
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
function parseHyperlink(
|
|
1220
|
+
node: XmlElementNode,
|
|
1221
|
+
sourceXml: string,
|
|
1222
|
+
relationshipMap: Map<string, OpcRelationship>,
|
|
1223
|
+
options: {
|
|
1224
|
+
allowDeletedText?: boolean;
|
|
1225
|
+
preserveUnsupportedReviewMarkup?: boolean;
|
|
1226
|
+
} = {},
|
|
1227
|
+
): ParsedHyperlinkNode | ParsedOpaqueInlineNode {
|
|
1228
|
+
const relationshipId = node.attributes["r:id"] ?? node.attributes.id;
|
|
1229
|
+
const anchor = node.attributes["w:anchor"] ?? node.attributes.anchor;
|
|
1230
|
+
let href: string | undefined;
|
|
1231
|
+
|
|
1232
|
+
if (relationshipId) {
|
|
1233
|
+
const relationship = relationshipMap.get(relationshipId);
|
|
1234
|
+
if (
|
|
1235
|
+
relationship &&
|
|
1236
|
+
relationship.type === HYPERLINK_RELATIONSHIP_TYPE &&
|
|
1237
|
+
relationship.targetMode === "external"
|
|
1238
|
+
) {
|
|
1239
|
+
href = relationship.target;
|
|
1240
|
+
}
|
|
1241
|
+
} else if (anchor) {
|
|
1242
|
+
href = `#${anchor}`;
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
if (!href) {
|
|
1246
|
+
return {
|
|
1247
|
+
type: "opaque_inline",
|
|
1248
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1249
|
+
};
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
const children: Array<ParsedTextNode | ParsedBreakNode | ParsedTabNode> = [];
|
|
1253
|
+
|
|
1254
|
+
for (const child of node.children) {
|
|
1255
|
+
if (child.type !== "element") {
|
|
1256
|
+
continue;
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
if (localName(child.name) !== "r") {
|
|
1260
|
+
return {
|
|
1261
|
+
type: "opaque_inline",
|
|
1262
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1263
|
+
};
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
const run = parseRunContentOnly(child, sourceXml, {
|
|
1267
|
+
allowDeletedText: options.allowDeletedText,
|
|
1268
|
+
preserveUnsupportedReviewMarkup: options.preserveUnsupportedReviewMarkup,
|
|
1269
|
+
});
|
|
1270
|
+
if (!run.supported) {
|
|
1271
|
+
return {
|
|
1272
|
+
type: "opaque_inline",
|
|
1273
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1274
|
+
};
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
children.push(...run.nodes);
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
return {
|
|
1281
|
+
type: "hyperlink",
|
|
1282
|
+
href,
|
|
1283
|
+
children,
|
|
1284
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
1285
|
+
};
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
function parseRunContentOnly(
|
|
1289
|
+
node: XmlElementNode,
|
|
1290
|
+
_sourceXml: string,
|
|
1291
|
+
options: {
|
|
1292
|
+
allowDeletedText?: boolean;
|
|
1293
|
+
preserveUnsupportedReviewMarkup?: boolean;
|
|
1294
|
+
} = {},
|
|
1295
|
+
): RunParseResult {
|
|
1296
|
+
const marksResult = readRunMarks(node, _sourceXml);
|
|
1297
|
+
if (!marksResult.supported) {
|
|
1298
|
+
return { nodes: [], supported: false };
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
const marks = marksResult.marks;
|
|
1302
|
+
const nodes: Array<ParsedTextNode | ParsedBreakNode | ParsedColumnBreakNode | ParsedTabNode | ParsedSymbolNode> = [];
|
|
1303
|
+
|
|
1304
|
+
for (const child of node.children) {
|
|
1305
|
+
if (child.type !== "element") {
|
|
1306
|
+
continue;
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
switch (localName(child.name)) {
|
|
1310
|
+
case "rPr":
|
|
1311
|
+
break;
|
|
1312
|
+
case "t": {
|
|
1313
|
+
const text = child.children
|
|
1314
|
+
.filter((entry): entry is XmlTextNode => entry.type === "text")
|
|
1315
|
+
.map((entry) => entry.text)
|
|
1316
|
+
.join("");
|
|
1317
|
+
nodes.push({
|
|
1318
|
+
type: "text",
|
|
1319
|
+
text,
|
|
1320
|
+
...(marks.length > 0 ? { marks } : {}),
|
|
1321
|
+
});
|
|
1322
|
+
break;
|
|
1323
|
+
}
|
|
1324
|
+
case "delText":
|
|
1325
|
+
case "delInstrText":
|
|
1326
|
+
if (!options.allowDeletedText) {
|
|
1327
|
+
return { nodes: [], supported: false };
|
|
1328
|
+
}
|
|
1329
|
+
nodes.push({
|
|
1330
|
+
type: "text",
|
|
1331
|
+
text: child.children
|
|
1332
|
+
.filter((entry): entry is XmlTextNode => entry.type === "text")
|
|
1333
|
+
.map((entry) => entry.text)
|
|
1334
|
+
.join(""),
|
|
1335
|
+
...(marks.length > 0 ? { marks } : {}),
|
|
1336
|
+
});
|
|
1337
|
+
break;
|
|
1338
|
+
case "tab":
|
|
1339
|
+
nodes.push({ type: "tab" });
|
|
1340
|
+
break;
|
|
1341
|
+
case "sym": {
|
|
1342
|
+
const symbol = parseSymbolNode(child, marks);
|
|
1343
|
+
if (!symbol) {
|
|
1344
|
+
return { nodes: [], supported: false };
|
|
1345
|
+
}
|
|
1346
|
+
nodes.push(symbol);
|
|
1347
|
+
break;
|
|
1348
|
+
}
|
|
1349
|
+
case "br":
|
|
1350
|
+
if (isColumnBreak(child)) {
|
|
1351
|
+
nodes.push({ type: "column_break" });
|
|
1352
|
+
break;
|
|
1353
|
+
}
|
|
1354
|
+
if (!isSimpleLineBreak(child)) {
|
|
1355
|
+
return { nodes: [], supported: false };
|
|
1356
|
+
}
|
|
1357
|
+
nodes.push({ type: "hard_break" });
|
|
1358
|
+
break;
|
|
1359
|
+
case "commentReference":
|
|
1360
|
+
case "lastRenderedPageBreak":
|
|
1361
|
+
case "proofErr":
|
|
1362
|
+
if (options.preserveUnsupportedReviewMarkup) {
|
|
1363
|
+
return { nodes: [], supported: false };
|
|
1364
|
+
}
|
|
1365
|
+
break;
|
|
1366
|
+
default:
|
|
1367
|
+
return { nodes: [], supported: false };
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
return { nodes, supported: true };
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
function readRunMarks(node: XmlElementNode, sourceXml: string): MarksParseResult {
|
|
1375
|
+
const properties = node.children.find(
|
|
1376
|
+
(child): child is XmlElementNode =>
|
|
1377
|
+
child.type === "element" && localName(child.name) === "rPr",
|
|
1378
|
+
);
|
|
1379
|
+
|
|
1380
|
+
if (!properties) {
|
|
1381
|
+
return { marks: [], supported: true };
|
|
1382
|
+
}
|
|
1383
|
+
|
|
1384
|
+
if (
|
|
1385
|
+
properties.children.some(
|
|
1386
|
+
(child) =>
|
|
1387
|
+
child.type === "element" &&
|
|
1388
|
+
DISALLOWED_RUN_PROPERTY_NAMES.has(localName(child.name)),
|
|
1389
|
+
)
|
|
1390
|
+
) {
|
|
1391
|
+
return {
|
|
1392
|
+
marks: [],
|
|
1393
|
+
supported: false,
|
|
1394
|
+
};
|
|
1395
|
+
}
|
|
1396
|
+
|
|
1397
|
+
const marks: TextMark[] = [];
|
|
1398
|
+
if (hasOnOffProperty(properties, "b")) {
|
|
1399
|
+
marks.push({ type: "bold" });
|
|
1400
|
+
}
|
|
1401
|
+
if (hasOnOffProperty(properties, "i")) {
|
|
1402
|
+
marks.push({ type: "italic" });
|
|
1403
|
+
}
|
|
1404
|
+
if (hasUnderlineProperty(properties)) {
|
|
1405
|
+
marks.push({ type: "underline" });
|
|
1406
|
+
}
|
|
1407
|
+
if (hasOnOffProperty(properties, "strike")) {
|
|
1408
|
+
marks.push({ type: "strikethrough" });
|
|
1409
|
+
}
|
|
1410
|
+
if (hasOnOffProperty(properties, "dstrike")) {
|
|
1411
|
+
marks.push({ type: "doubleStrikethrough" });
|
|
1412
|
+
}
|
|
1413
|
+
if (hasOnOffProperty(properties, "vanish")) {
|
|
1414
|
+
marks.push({ type: "vanish" });
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
const langMark = readRunLang(properties);
|
|
1418
|
+
if (langMark) {
|
|
1419
|
+
marks.push(langMark);
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
const backgroundColorMark = readRunBackgroundColor(properties);
|
|
1423
|
+
if (backgroundColorMark) {
|
|
1424
|
+
marks.push(backgroundColorMark);
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
const charSpacingMark = readNumericRunMark(properties, "spacing", "charSpacing");
|
|
1428
|
+
if (charSpacingMark) {
|
|
1429
|
+
marks.push(charSpacingMark);
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
const kerningMark = readNumericRunMark(properties, "kern", "kerning");
|
|
1433
|
+
if (kerningMark) {
|
|
1434
|
+
marks.push(kerningMark);
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
const positionMark = readNumericRunMark(properties, "position", "position");
|
|
1438
|
+
if (positionMark) {
|
|
1439
|
+
marks.push(positionMark);
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
if (hasOnOffProperty(properties, "emboss")) {
|
|
1443
|
+
marks.push({ type: "emboss" });
|
|
1444
|
+
}
|
|
1445
|
+
if (hasOnOffProperty(properties, "imprint")) {
|
|
1446
|
+
marks.push({ type: "imprint" });
|
|
1447
|
+
}
|
|
1448
|
+
if (hasOnOffProperty(properties, "shadow")) {
|
|
1449
|
+
marks.push({ type: "shadow" });
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
const textFillMark = readRunTextFill(properties, sourceXml);
|
|
1453
|
+
if (textFillMark) {
|
|
1454
|
+
marks.push(textFillMark);
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
return {
|
|
1458
|
+
marks,
|
|
1459
|
+
supported: true,
|
|
1460
|
+
};
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
function readRunLang(properties: XmlElementNode): TextMark | undefined {
|
|
1464
|
+
const langNode = properties.children.find(
|
|
1465
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "lang",
|
|
1466
|
+
);
|
|
1467
|
+
if (!langNode) return undefined;
|
|
1468
|
+
const val =
|
|
1469
|
+
langNode.attributes["w:val"] ??
|
|
1470
|
+
langNode.attributes.val ??
|
|
1471
|
+
langNode.attributes["w:bidi"] ??
|
|
1472
|
+
langNode.attributes.bidi;
|
|
1473
|
+
if (!val) return undefined;
|
|
1474
|
+
return { type: "lang", val };
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
function readRunBackgroundColor(properties: XmlElementNode): TextMark | undefined {
|
|
1478
|
+
const shadingNode = properties.children.find(
|
|
1479
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "shd",
|
|
1480
|
+
);
|
|
1481
|
+
if (!shadingNode) {
|
|
1482
|
+
return undefined;
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
const fill = shadingNode.attributes["w:fill"] ?? shadingNode.attributes.fill;
|
|
1486
|
+
if (!fill || fill === "auto") {
|
|
1487
|
+
return undefined;
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
return { type: "backgroundColor", color: fill };
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
function readNumericRunMark(
|
|
1494
|
+
properties: XmlElementNode,
|
|
1495
|
+
elementName: "spacing" | "kern" | "position",
|
|
1496
|
+
markType: "charSpacing" | "kerning" | "position",
|
|
1497
|
+
): TextMark | undefined {
|
|
1498
|
+
const propertyNode = properties.children.find(
|
|
1499
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === elementName,
|
|
1500
|
+
);
|
|
1501
|
+
if (!propertyNode) {
|
|
1502
|
+
return undefined;
|
|
1503
|
+
}
|
|
1504
|
+
const rawValue = propertyNode.attributes["w:val"] ?? propertyNode.attributes.val;
|
|
1505
|
+
if (rawValue === undefined) {
|
|
1506
|
+
return undefined;
|
|
1507
|
+
}
|
|
1508
|
+
const value = Number.parseInt(rawValue, 10);
|
|
1509
|
+
if (!Number.isFinite(value)) {
|
|
1510
|
+
return undefined;
|
|
1511
|
+
}
|
|
1512
|
+
return { type: markType, val: value };
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1515
|
+
function readRunTextFill(properties: XmlElementNode, sourceXml: string): TextMark | undefined {
|
|
1516
|
+
const textFillNode = properties.children.find(
|
|
1517
|
+
(child): child is XmlElementNode => child.type === "element" && localName(child.name) === "textFill",
|
|
1518
|
+
);
|
|
1519
|
+
if (!textFillNode) {
|
|
1520
|
+
return undefined;
|
|
1521
|
+
}
|
|
1522
|
+
return {
|
|
1523
|
+
type: "textFill",
|
|
1524
|
+
xml: sourceXml.slice(textFillNode.start, textFillNode.end),
|
|
1525
|
+
};
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
function parseSymbolNode(
|
|
1529
|
+
node: XmlElementNode,
|
|
1530
|
+
marks: TextMark[],
|
|
1531
|
+
): ParsedSymbolNode | undefined {
|
|
1532
|
+
const char = node.attributes["w:char"] ?? node.attributes.char;
|
|
1533
|
+
if (!char) {
|
|
1534
|
+
return undefined;
|
|
1535
|
+
}
|
|
1536
|
+
const font = node.attributes["w:font"] ?? node.attributes.font;
|
|
1537
|
+
return {
|
|
1538
|
+
type: "symbol",
|
|
1539
|
+
char,
|
|
1540
|
+
...(font ? { font } : {}),
|
|
1541
|
+
...(marks.length > 0 ? { marks } : {}),
|
|
1542
|
+
};
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
function supportsParagraphProperties(node: XmlElementNode): boolean {
|
|
1546
|
+
for (const child of node.children) {
|
|
1547
|
+
if (child.type !== "element") {
|
|
1548
|
+
continue;
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
const name = localName(child.name);
|
|
1552
|
+
if (name === "pPrChange") {
|
|
1553
|
+
return false;
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
if (name === "rPr") {
|
|
1557
|
+
if (
|
|
1558
|
+
child.children.some(
|
|
1559
|
+
(entry) =>
|
|
1560
|
+
entry.type === "element" &&
|
|
1561
|
+
DISALLOWED_PARAGRAPH_PROPERTY_NAMES.has(localName(entry.name)),
|
|
1562
|
+
)
|
|
1563
|
+
) {
|
|
1564
|
+
return false;
|
|
1565
|
+
}
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
return true;
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
const DISALLOWED_RUN_PROPERTY_NAMES = new Set(["rPrChange"]);
|
|
1573
|
+
const DISALLOWED_PARAGRAPH_PROPERTY_NAMES = new Set(["pPrChange", "rPrChange"]);
|
|
1574
|
+
|
|
1575
|
+
function hasOnOffProperty(properties: XmlElementNode, propertyName: string): boolean {
|
|
1576
|
+
const property = properties.children.find(
|
|
1577
|
+
(child): child is XmlElementNode =>
|
|
1578
|
+
child.type === "element" && localName(child.name) === propertyName,
|
|
1579
|
+
);
|
|
1580
|
+
if (!property) {
|
|
1581
|
+
return false;
|
|
1582
|
+
}
|
|
1583
|
+
|
|
1584
|
+
const value = (property.attributes["w:val"] ?? property.attributes.val ?? "true").toLowerCase();
|
|
1585
|
+
return value !== "false" && value !== "0" && value !== "off";
|
|
1586
|
+
}
|
|
1587
|
+
|
|
1588
|
+
function hasUnderlineProperty(properties: XmlElementNode): boolean {
|
|
1589
|
+
const property = properties.children.find(
|
|
1590
|
+
(child): child is XmlElementNode =>
|
|
1591
|
+
child.type === "element" && localName(child.name) === "u",
|
|
1592
|
+
);
|
|
1593
|
+
if (!property) {
|
|
1594
|
+
return false;
|
|
1595
|
+
}
|
|
1596
|
+
|
|
1597
|
+
const value = (property.attributes["w:val"] ?? property.attributes.val ?? "single").toLowerCase();
|
|
1598
|
+
return value !== "none";
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1601
|
+
function isSimpleLineBreak(node: XmlElementNode): boolean {
|
|
1602
|
+
const value = (node.attributes["w:type"] ?? node.attributes.type ?? "textWrapping").toLowerCase();
|
|
1603
|
+
return value === "textwrapping" || value === "line";
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1606
|
+
function isColumnBreak(node: XmlElementNode): boolean {
|
|
1607
|
+
const value = (node.attributes["w:type"] ?? node.attributes.type ?? "").toLowerCase();
|
|
1608
|
+
return value === "column";
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
function findChildElement(node: XmlElementNode, childLocalName: string): XmlElementNode {
|
|
1612
|
+
const child = node.children.find(
|
|
1613
|
+
(entry): entry is XmlElementNode =>
|
|
1614
|
+
entry.type === "element" && localName(entry.name) === childLocalName,
|
|
1615
|
+
);
|
|
1616
|
+
|
|
1617
|
+
if (!child) {
|
|
1618
|
+
throw new Error(`Expected <${childLocalName}> element in main document XML.`);
|
|
1619
|
+
}
|
|
1620
|
+
|
|
1621
|
+
return child;
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
function localName(name: string): string {
|
|
1625
|
+
const separatorIndex = name.indexOf(":");
|
|
1626
|
+
return separatorIndex >= 0 ? name.slice(separatorIndex + 1) : name;
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1629
|
+
function readOptionalAttribute(node: XmlElementNode, name: string): string | undefined {
|
|
1630
|
+
return node.attributes[`w:${name}`]
|
|
1631
|
+
?? node.attributes[`r:${name}`]
|
|
1632
|
+
?? node.attributes[name];
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
function parseXml(xml: string): XmlElementNode {
|
|
1636
|
+
const root: XmlElementNode = {
|
|
1637
|
+
type: "element",
|
|
1638
|
+
name: "__root__",
|
|
1639
|
+
attributes: {},
|
|
1640
|
+
children: [],
|
|
1641
|
+
start: 0,
|
|
1642
|
+
end: xml.length,
|
|
1643
|
+
};
|
|
1644
|
+
const stack: XmlElementNode[] = [root];
|
|
1645
|
+
let cursor = 0;
|
|
1646
|
+
|
|
1647
|
+
while (cursor < xml.length) {
|
|
1648
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
1649
|
+
const end = xml.indexOf("-->", cursor);
|
|
1650
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
1651
|
+
continue;
|
|
1652
|
+
}
|
|
1653
|
+
|
|
1654
|
+
if (xml.startsWith("<?", cursor)) {
|
|
1655
|
+
const end = xml.indexOf("?>", cursor);
|
|
1656
|
+
cursor = end >= 0 ? end + 2 : xml.length;
|
|
1657
|
+
continue;
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1660
|
+
if (xml.startsWith("<![CDATA[", cursor)) {
|
|
1661
|
+
const end = xml.indexOf("]]>", cursor);
|
|
1662
|
+
const textEnd = end >= 0 ? end : xml.length;
|
|
1663
|
+
stack[stack.length - 1]?.children.push({
|
|
1664
|
+
type: "text",
|
|
1665
|
+
text: xml.slice(cursor + 9, textEnd),
|
|
1666
|
+
start: cursor,
|
|
1667
|
+
end: end >= 0 ? end + 3 : xml.length,
|
|
1668
|
+
});
|
|
1669
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
1670
|
+
continue;
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
const currentChar = xml[cursor];
|
|
1674
|
+
if (currentChar !== "<") {
|
|
1675
|
+
const nextTag = xml.indexOf("<", cursor);
|
|
1676
|
+
const end = nextTag >= 0 ? nextTag : xml.length;
|
|
1677
|
+
const text = decodeXmlEntities(xml.slice(cursor, end));
|
|
1678
|
+
if (text.length > 0) {
|
|
1679
|
+
stack[stack.length - 1]?.children.push({
|
|
1680
|
+
type: "text",
|
|
1681
|
+
text,
|
|
1682
|
+
start: cursor,
|
|
1683
|
+
end,
|
|
1684
|
+
});
|
|
1685
|
+
}
|
|
1686
|
+
cursor = end;
|
|
1687
|
+
continue;
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
if (xml[cursor + 1] === "/") {
|
|
1691
|
+
const end = xml.indexOf(">", cursor);
|
|
1692
|
+
if (end < 0) {
|
|
1693
|
+
throw new Error("Malformed XML: missing closing >.");
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
const name = xml.slice(cursor + 2, end).trim();
|
|
1697
|
+
const current = stack.pop();
|
|
1698
|
+
if (!current || localName(current.name) !== localName(name)) {
|
|
1699
|
+
throw new Error(`Malformed XML: unexpected closing tag </${name}>.`);
|
|
1700
|
+
}
|
|
1701
|
+
current.end = end + 1;
|
|
1702
|
+
cursor = end + 1;
|
|
1703
|
+
continue;
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
const tagEnd = findTagEnd(xml, cursor);
|
|
1707
|
+
const tagBody = xml.slice(cursor + 1, tagEnd);
|
|
1708
|
+
const selfClosing = /\/\s*$/.test(tagBody);
|
|
1709
|
+
const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
|
|
1710
|
+
const element: XmlElementNode = {
|
|
1711
|
+
type: "element",
|
|
1712
|
+
name,
|
|
1713
|
+
attributes,
|
|
1714
|
+
children: [],
|
|
1715
|
+
start: cursor,
|
|
1716
|
+
end: tagEnd + 1,
|
|
1717
|
+
};
|
|
1718
|
+
stack[stack.length - 1]?.children.push(element);
|
|
1719
|
+
|
|
1720
|
+
if (!selfClosing) {
|
|
1721
|
+
stack.push(element);
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
cursor = tagEnd + 1;
|
|
1725
|
+
}
|
|
1726
|
+
|
|
1727
|
+
if (stack.length !== 1) {
|
|
1728
|
+
throw new Error("Malformed XML: unclosed element in main document XML.");
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
return root;
|
|
1732
|
+
}
|
|
1733
|
+
|
|
1734
|
+
function parseTag(tagBody: string): { name: string; attributes: Record<string, string> } {
|
|
1735
|
+
let cursor = 0;
|
|
1736
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
1737
|
+
cursor += 1;
|
|
1738
|
+
}
|
|
1739
|
+
|
|
1740
|
+
const nameStart = cursor;
|
|
1741
|
+
while (cursor < tagBody.length && !/\s/.test(tagBody[cursor] ?? "")) {
|
|
1742
|
+
cursor += 1;
|
|
1743
|
+
}
|
|
1744
|
+
const name = tagBody.slice(nameStart, cursor);
|
|
1745
|
+
const attributes: Record<string, string> = {};
|
|
1746
|
+
|
|
1747
|
+
while (cursor < tagBody.length) {
|
|
1748
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
1749
|
+
cursor += 1;
|
|
1750
|
+
}
|
|
1751
|
+
if (cursor >= tagBody.length) {
|
|
1752
|
+
break;
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
const keyStart = cursor;
|
|
1756
|
+
while (cursor < tagBody.length && !/[\s=]/.test(tagBody[cursor] ?? "")) {
|
|
1757
|
+
cursor += 1;
|
|
1758
|
+
}
|
|
1759
|
+
const key = tagBody.slice(keyStart, cursor);
|
|
1760
|
+
|
|
1761
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
1762
|
+
cursor += 1;
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1765
|
+
if (tagBody[cursor] !== "=") {
|
|
1766
|
+
attributes[key] = "";
|
|
1767
|
+
continue;
|
|
1768
|
+
}
|
|
1769
|
+
cursor += 1;
|
|
1770
|
+
|
|
1771
|
+
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) {
|
|
1772
|
+
cursor += 1;
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
const quote = tagBody[cursor];
|
|
1776
|
+
if (quote !== `"` && quote !== `'`) {
|
|
1777
|
+
throw new Error(`Malformed XML attribute ${key}.`);
|
|
1778
|
+
}
|
|
1779
|
+
cursor += 1;
|
|
1780
|
+
|
|
1781
|
+
const valueStart = cursor;
|
|
1782
|
+
while (cursor < tagBody.length && tagBody[cursor] !== quote) {
|
|
1783
|
+
cursor += 1;
|
|
1784
|
+
}
|
|
1785
|
+
const rawValue = tagBody.slice(valueStart, cursor);
|
|
1786
|
+
attributes[key] = decodeXmlEntities(rawValue);
|
|
1787
|
+
cursor += 1;
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
return { name, attributes };
|
|
1791
|
+
}
|
|
1792
|
+
|
|
1793
|
+
function findTagEnd(xml: string, start: number): number {
|
|
1794
|
+
let cursor = start + 1;
|
|
1795
|
+
let quote: string | null = null;
|
|
1796
|
+
|
|
1797
|
+
while (cursor < xml.length) {
|
|
1798
|
+
const current = xml[cursor];
|
|
1799
|
+
if (quote) {
|
|
1800
|
+
if (current === quote) {
|
|
1801
|
+
quote = null;
|
|
1802
|
+
}
|
|
1803
|
+
cursor += 1;
|
|
1804
|
+
continue;
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
if (current === `"` || current === `'`) {
|
|
1808
|
+
quote = current;
|
|
1809
|
+
cursor += 1;
|
|
1810
|
+
continue;
|
|
1811
|
+
}
|
|
1812
|
+
|
|
1813
|
+
if (current === ">") {
|
|
1814
|
+
return cursor;
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1817
|
+
cursor += 1;
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1820
|
+
throw new Error("Malformed XML: missing >.");
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
function decodeXmlEntities(value: string): string {
|
|
1824
|
+
return value.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (match, entity) => {
|
|
1825
|
+
switch (entity) {
|
|
1826
|
+
case "amp":
|
|
1827
|
+
return "&";
|
|
1828
|
+
case "lt":
|
|
1829
|
+
return "<";
|
|
1830
|
+
case "gt":
|
|
1831
|
+
return ">";
|
|
1832
|
+
case "quot":
|
|
1833
|
+
return `"`;
|
|
1834
|
+
case "apos":
|
|
1835
|
+
return "'";
|
|
1836
|
+
default:
|
|
1837
|
+
if (entity.startsWith("#x")) {
|
|
1838
|
+
return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
|
|
1839
|
+
}
|
|
1840
|
+
if (entity.startsWith("#")) {
|
|
1841
|
+
return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
|
|
1842
|
+
}
|
|
1843
|
+
return match;
|
|
1844
|
+
}
|
|
1845
|
+
});
|
|
1846
|
+
}
|