@beyondwork/docx-react-component 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/package.json +24 -34
- package/src/api/README.md +5 -1
- package/src/api/public-types.ts +710 -4
- package/src/api/session-state.ts +60 -0
- package/src/core/commands/formatting-commands.ts +2 -1
- package/src/core/commands/image-commands.ts +147 -0
- package/src/core/commands/index.ts +19 -3
- package/src/core/commands/list-commands.ts +231 -36
- package/src/core/commands/paragraph-layout-commands.ts +339 -0
- package/src/core/commands/section-layout-commands.ts +680 -0
- package/src/core/commands/style-commands.ts +262 -0
- package/src/core/search/search-text.ts +357 -0
- package/src/core/selection/mapping.ts +41 -0
- package/src/core/state/editor-state.ts +4 -1
- package/src/index.ts +51 -0
- package/src/io/docx-session.ts +623 -56
- package/src/io/export/serialize-comments.ts +104 -34
- package/src/io/export/serialize-footnotes.ts +198 -1
- package/src/io/export/serialize-headers-footers.ts +203 -10
- package/src/io/export/serialize-main-document.ts +285 -8
- package/src/io/export/serialize-numbering.ts +28 -7
- package/src/io/export/split-review-boundaries.ts +181 -19
- package/src/io/normalize/normalize-text.ts +144 -32
- package/src/io/ooxml/highlight-colors.ts +39 -0
- package/src/io/ooxml/numbering-sentinels.ts +44 -0
- package/src/io/ooxml/parse-comments.ts +85 -19
- package/src/io/ooxml/parse-fields.ts +396 -0
- package/src/io/ooxml/parse-footnotes.ts +452 -22
- package/src/io/ooxml/parse-headers-footers.ts +657 -29
- package/src/io/ooxml/parse-inline-media.ts +30 -0
- package/src/io/ooxml/parse-main-document.ts +807 -20
- package/src/io/ooxml/parse-numbering.ts +7 -0
- package/src/io/ooxml/parse-revisions.ts +317 -38
- package/src/io/ooxml/parse-settings.ts +184 -0
- package/src/io/ooxml/parse-shapes.ts +25 -0
- package/src/io/ooxml/parse-styles.ts +463 -0
- package/src/io/ooxml/parse-theme.ts +32 -0
- package/src/legal/bookmarks.ts +44 -0
- package/src/legal/cross-references.ts +59 -1
- package/src/model/canonical-document.ts +250 -4
- package/src/model/cds-1.0.0.ts +13 -0
- package/src/model/snapshot.ts +87 -2
- package/src/review/store/revision-store.ts +6 -0
- package/src/review/store/revision-types.ts +1 -0
- package/src/runtime/document-layout.ts +332 -0
- package/src/runtime/document-navigation.ts +603 -0
- package/src/runtime/document-runtime.ts +1754 -78
- package/src/runtime/document-search.ts +145 -0
- package/src/runtime/numbering-prefix.ts +47 -26
- package/src/runtime/page-layout-estimation.ts +212 -0
- package/src/runtime/read-only-diagnostics-runtime.ts +9 -0
- package/src/runtime/session-capabilities.ts +35 -3
- package/src/runtime/story-context.ts +164 -0
- package/src/runtime/story-targeting.ts +162 -0
- package/src/runtime/surface-projection.ts +324 -36
- package/src/runtime/table-schema.ts +89 -7
- package/src/runtime/view-state.ts +477 -0
- package/src/runtime/workflow-markup.ts +349 -0
- package/src/ui/WordReviewEditor.tsx +2469 -1344
- package/src/ui/browser-export.ts +52 -0
- package/src/ui/editor-command-bag.ts +120 -0
- package/src/ui/editor-runtime-boundary.ts +1422 -0
- package/src/ui/editor-shell-view.tsx +134 -0
- package/src/ui/editor-surface-controller.tsx +51 -0
- package/src/ui/headless/preserve-editor-selection.ts +5 -0
- package/src/ui/headless/revision-decoration-model.ts +4 -4
- package/src/ui/headless/selection-helpers.ts +20 -0
- package/src/ui/headless/selection-toolbar-model.ts +22 -0
- package/src/ui/headless/use-editor-keyboard.ts +6 -1
- package/src/ui/runtime-snapshot-selectors.ts +197 -0
- package/src/ui-tailwind/chrome/tw-alert-banner.tsx +18 -2
- package/src/ui-tailwind/chrome/tw-image-context-toolbar.tsx +129 -0
- package/src/ui-tailwind/chrome/tw-layout-panel.tsx +114 -0
- package/src/ui-tailwind/chrome/tw-object-context-toolbar.tsx +34 -0
- package/src/ui-tailwind/chrome/tw-page-ruler.tsx +386 -0
- package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +150 -14
- package/src/ui-tailwind/chrome/tw-table-context-toolbar.tsx +128 -0
- package/src/ui-tailwind/editor-surface/perf-probe.ts +179 -0
- package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +46 -7
- package/src/ui-tailwind/editor-surface/pm-contextual-ui.ts +31 -0
- package/src/ui-tailwind/editor-surface/pm-decorations.ts +35 -0
- package/src/ui-tailwind/editor-surface/pm-position-map.ts +3 -3
- package/src/ui-tailwind/editor-surface/pm-schema.ts +186 -13
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +191 -68
- package/src/ui-tailwind/editor-surface/search-plugin.ts +19 -68
- package/src/ui-tailwind/editor-surface/surface-build-keys.ts +51 -0
- package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +11 -0
- package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +7 -1
- package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +528 -85
- package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +0 -1
- package/src/ui-tailwind/index.ts +2 -1
- package/src/ui-tailwind/page-chrome-model.ts +27 -0
- package/src/ui-tailwind/review/tw-comment-sidebar.tsx +277 -147
- package/src/ui-tailwind/review/tw-health-panel.tsx +31 -2
- package/src/ui-tailwind/review/tw-review-rail.tsx +8 -8
- package/src/ui-tailwind/review/tw-revision-sidebar.tsx +15 -15
- package/src/ui-tailwind/theme/editor-theme.css +127 -0
- package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +4 -0
- package/src/ui-tailwind/toolbar/tw-toolbar.tsx +829 -12
- package/src/ui-tailwind/tw-review-workspace.tsx +1238 -42
- package/src/validation/compatibility-engine.ts +119 -24
- package/src/validation/compatibility-report.ts +1 -0
- package/src/validation/diagnostics.ts +1 -0
- package/src/validation/docx-comment-proof.ts +707 -0
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
import type { NumberingCatalog, ParagraphNode } from "../../model/canonical-document.ts";
|
|
2
|
+
import {
|
|
3
|
+
isSyntheticDocxNullAbstractDefinition,
|
|
4
|
+
isSyntheticDocxNullNumberingInstance,
|
|
5
|
+
} from "../ooxml/numbering-sentinels.ts";
|
|
2
6
|
|
|
3
7
|
export const WORD_NUMBERING_CONTENT_TYPE =
|
|
4
8
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml";
|
|
5
9
|
|
|
6
10
|
export function serializeNumberingXml(catalog: NumberingCatalog): string {
|
|
7
|
-
const abstractDefinitions = Object.values(catalog.abstractDefinitions)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
)
|
|
11
|
+
const abstractDefinitions = Object.values(catalog.abstractDefinitions)
|
|
12
|
+
.filter((definition) => !isSyntheticDocxNullAbstractDefinition(definition))
|
|
13
|
+
.sort((left, right) =>
|
|
14
|
+
compareSerializedIds(left.abstractNumberingId, right.abstractNumberingId),
|
|
15
|
+
);
|
|
16
|
+
const instances = Object.values(catalog.instances)
|
|
17
|
+
.filter((instance) => !isSyntheticDocxNullNumberingInstance(instance))
|
|
18
|
+
.sort((left, right) =>
|
|
19
|
+
compareSerializedIds(left.numberingInstanceId, right.numberingInstanceId),
|
|
20
|
+
);
|
|
13
21
|
|
|
14
22
|
const body = [
|
|
15
23
|
...abstractDefinitions.map((definition) => serializeAbstractDefinition(definition)),
|
|
@@ -22,6 +30,17 @@ export function serializeNumberingXml(catalog: NumberingCatalog): string {
|
|
|
22
30
|
].join("\n");
|
|
23
31
|
}
|
|
24
32
|
|
|
33
|
+
export function hasSerializableNumberingEntries(catalog: NumberingCatalog): boolean {
|
|
34
|
+
return (
|
|
35
|
+
Object.values(catalog.abstractDefinitions).some(
|
|
36
|
+
(definition) => !isSyntheticDocxNullAbstractDefinition(definition),
|
|
37
|
+
) ||
|
|
38
|
+
Object.values(catalog.instances).some(
|
|
39
|
+
(instance) => !isSyntheticDocxNullNumberingInstance(instance),
|
|
40
|
+
)
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
|
|
25
44
|
export function serializeParagraphNumberingProperties(
|
|
26
45
|
numbering: ParagraphNode["numbering"],
|
|
27
46
|
): string {
|
|
@@ -51,10 +70,12 @@ function serializeLevel(level: NumberingCatalog["abstractDefinitions"][string]["
|
|
|
51
70
|
const paragraphStyle = level.paragraphStyleId
|
|
52
71
|
? `<w:pStyle w:val="${escapeAttribute(level.paragraphStyleId)}"/>`
|
|
53
72
|
: "";
|
|
73
|
+
const isLegal = level.isLegalNumbering ? "<w:isLgl/>" : "";
|
|
74
|
+
const suffix = level.suffix ? `<w:suff w:val="${escapeAttribute(level.suffix)}"/>` : "";
|
|
54
75
|
|
|
55
76
|
return `<w:lvl w:ilvl="${level.level}">${start}<w:numFmt w:val="${escapeAttribute(
|
|
56
77
|
level.format,
|
|
57
|
-
)}"/><w:lvlText w:val="${escapeAttribute(level.text)}"/>${paragraphStyle}</w:lvl>`;
|
|
78
|
+
)}"/><w:lvlText w:val="${escapeAttribute(level.text)}"/>${paragraphStyle}${isLegal}${suffix}</w:lvl>`;
|
|
58
79
|
}
|
|
59
80
|
|
|
60
81
|
function serializeInstance(instance: NumberingCatalog["instances"][string]): string {
|
|
@@ -18,25 +18,9 @@ export function splitDocumentAtReviewBoundaries(
|
|
|
18
18
|
return content;
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
let cursor = 0;
|
|
22
|
-
const children = content.children.map((block, index) => {
|
|
23
|
-
if (index > 0 && content.children[index - 1]?.type === "paragraph" && block.type === "paragraph") {
|
|
24
|
-
cursor += 1;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
if (block.type !== "paragraph") {
|
|
28
|
-
cursor += 1;
|
|
29
|
-
return block;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
const next = splitParagraph(block, splitPositions, cursor);
|
|
33
|
-
cursor = next.cursor;
|
|
34
|
-
return next.paragraph;
|
|
35
|
-
});
|
|
36
|
-
|
|
37
21
|
return {
|
|
38
22
|
type: "doc",
|
|
39
|
-
children,
|
|
23
|
+
children: splitBlockNodes(content.children, splitPositions, 0, true).children,
|
|
40
24
|
};
|
|
41
25
|
}
|
|
42
26
|
|
|
@@ -98,7 +82,7 @@ function splitParagraph(
|
|
|
98
82
|
}
|
|
99
83
|
|
|
100
84
|
children.push(child);
|
|
101
|
-
nextCursor +=
|
|
85
|
+
nextCursor += measureInlineNodeForReviewBoundaries(child);
|
|
102
86
|
}
|
|
103
87
|
|
|
104
88
|
return {
|
|
@@ -110,6 +94,147 @@ function splitParagraph(
|
|
|
110
94
|
};
|
|
111
95
|
}
|
|
112
96
|
|
|
97
|
+
function splitBlockNodes(
|
|
98
|
+
blocks: readonly DocumentRootNode["children"][number][],
|
|
99
|
+
splitPositions: ReadonlySet<number>,
|
|
100
|
+
cursor: number,
|
|
101
|
+
useSurfaceParagraphSeparators: boolean,
|
|
102
|
+
): {
|
|
103
|
+
children: DocumentRootNode["children"];
|
|
104
|
+
cursor: number;
|
|
105
|
+
} {
|
|
106
|
+
const children: DocumentRootNode["children"] = [];
|
|
107
|
+
let nextCursor = cursor;
|
|
108
|
+
for (const [index, block] of blocks.entries()) {
|
|
109
|
+
if (block.type === "paragraph") {
|
|
110
|
+
if (useSurfaceParagraphSeparators && index > 0) {
|
|
111
|
+
nextCursor += 1;
|
|
112
|
+
}
|
|
113
|
+
const next = splitParagraph(block, splitPositions, nextCursor);
|
|
114
|
+
children.push(next.paragraph);
|
|
115
|
+
nextCursor = next.cursor;
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (block.type === "table") {
|
|
120
|
+
const next = splitTableAtReviewBoundaries(block, splitPositions, nextCursor);
|
|
121
|
+
children.push(next.table);
|
|
122
|
+
nextCursor = next.cursor;
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (block.type === "sdt") {
|
|
127
|
+
const next = splitBlockNodes(block.children, splitPositions, nextCursor, false);
|
|
128
|
+
children.push({
|
|
129
|
+
...block,
|
|
130
|
+
children: next.children,
|
|
131
|
+
});
|
|
132
|
+
nextCursor = next.cursor;
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (block.type === "custom_xml") {
|
|
137
|
+
children.push(block);
|
|
138
|
+
nextCursor += 1;
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
children.push(block);
|
|
143
|
+
nextCursor += 1;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
children,
|
|
148
|
+
cursor: nextCursor,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function splitTableAtReviewBoundaries(
|
|
153
|
+
table: Extract<DocumentRootNode["children"][number], { type: "table" }>,
|
|
154
|
+
splitPositions: ReadonlySet<number>,
|
|
155
|
+
cursor: number,
|
|
156
|
+
): {
|
|
157
|
+
table: Extract<DocumentRootNode["children"][number], { type: "table" }>;
|
|
158
|
+
cursor: number;
|
|
159
|
+
} {
|
|
160
|
+
let nextCursor = cursor;
|
|
161
|
+
const rows = table.rows.map((row) => ({
|
|
162
|
+
...row,
|
|
163
|
+
cells: row.cells.map((cell) => {
|
|
164
|
+
const next = splitBlockNodes(cell.children, splitPositions, nextCursor, false);
|
|
165
|
+
nextCursor = next.cursor;
|
|
166
|
+
return {
|
|
167
|
+
...cell,
|
|
168
|
+
children: next.children,
|
|
169
|
+
};
|
|
170
|
+
}),
|
|
171
|
+
}));
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
table: {
|
|
175
|
+
...table,
|
|
176
|
+
rows,
|
|
177
|
+
},
|
|
178
|
+
cursor: nextCursor,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function advanceCursorThroughTable(
|
|
183
|
+
table: Extract<DocumentRootNode["children"][number], { type: "table" }>,
|
|
184
|
+
cursor: number,
|
|
185
|
+
): number {
|
|
186
|
+
let nextCursor = cursor;
|
|
187
|
+
for (const row of table.rows) {
|
|
188
|
+
for (const cell of row.cells) {
|
|
189
|
+
nextCursor = measureBlockNodesForReviewBoundaries(cell.children, nextCursor, false);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return nextCursor;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function measureBlockNodesForReviewBoundaries(
|
|
196
|
+
blocks: readonly DocumentRootNode["children"][number][],
|
|
197
|
+
cursor: number,
|
|
198
|
+
useSurfaceParagraphSeparators: boolean,
|
|
199
|
+
): number {
|
|
200
|
+
let nextCursor = cursor;
|
|
201
|
+
for (const [index, block] of blocks.entries()) {
|
|
202
|
+
if (block.type === "paragraph") {
|
|
203
|
+
if (useSurfaceParagraphSeparators && index > 0) {
|
|
204
|
+
nextCursor += 1;
|
|
205
|
+
}
|
|
206
|
+
nextCursor += block.children.reduce(
|
|
207
|
+
(size, child) => size + measureInlineNodeForReviewBoundaries(child),
|
|
208
|
+
0,
|
|
209
|
+
);
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (block.type === "table") {
|
|
214
|
+
nextCursor = advanceCursorThroughTable(block, nextCursor);
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (block.type === "sdt") {
|
|
219
|
+
nextCursor = measureBlockNodesForReviewBoundaries(
|
|
220
|
+
block.children,
|
|
221
|
+
nextCursor,
|
|
222
|
+
false,
|
|
223
|
+
);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (block.type === "custom_xml") {
|
|
228
|
+
nextCursor += 1;
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
nextCursor += 1;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return nextCursor;
|
|
236
|
+
}
|
|
237
|
+
|
|
113
238
|
function splitHyperlinkNode(
|
|
114
239
|
node: HyperlinkNode,
|
|
115
240
|
splitPositions: ReadonlySet<number>,
|
|
@@ -132,7 +257,7 @@ function splitHyperlinkNode(
|
|
|
132
257
|
}
|
|
133
258
|
|
|
134
259
|
groups[groups.length - 1]?.push(child);
|
|
135
|
-
nextCursor +=
|
|
260
|
+
nextCursor += measureInlineNodeForReviewBoundaries(child);
|
|
136
261
|
if (splitPositions.has(nextCursor)) {
|
|
137
262
|
groups.push([]);
|
|
138
263
|
}
|
|
@@ -192,3 +317,40 @@ function splitTextNode(
|
|
|
192
317
|
cursor: cursor + codepoints.length,
|
|
193
318
|
};
|
|
194
319
|
}
|
|
320
|
+
|
|
321
|
+
function measureInlineNodeForReviewBoundaries(node: InlineNode): number {
|
|
322
|
+
switch (node.type) {
|
|
323
|
+
case "text":
|
|
324
|
+
return Array.from(node.text).length;
|
|
325
|
+
case "bookmark_start":
|
|
326
|
+
case "bookmark_end":
|
|
327
|
+
return 0;
|
|
328
|
+
case "hyperlink":
|
|
329
|
+
return node.children.reduce(
|
|
330
|
+
(size, child) => size + measureInlineNodeForReviewBoundaries(child),
|
|
331
|
+
0,
|
|
332
|
+
);
|
|
333
|
+
case "field": {
|
|
334
|
+
const childWidth = node.children.reduce(
|
|
335
|
+
(size, child) => size + measureInlineNodeForReviewBoundaries(child),
|
|
336
|
+
0,
|
|
337
|
+
);
|
|
338
|
+
return childWidth > 0 ? childWidth : 1;
|
|
339
|
+
}
|
|
340
|
+
case "tab":
|
|
341
|
+
case "hard_break":
|
|
342
|
+
case "column_break":
|
|
343
|
+
case "footnote_ref":
|
|
344
|
+
case "image":
|
|
345
|
+
case "opaque_inline":
|
|
346
|
+
case "chart_preview":
|
|
347
|
+
case "smartart_preview":
|
|
348
|
+
case "shape":
|
|
349
|
+
case "wordart":
|
|
350
|
+
case "vml_shape":
|
|
351
|
+
case "symbol":
|
|
352
|
+
return 1;
|
|
353
|
+
default:
|
|
354
|
+
return 1;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
@@ -4,6 +4,7 @@ import type {
|
|
|
4
4
|
CustomXmlNode,
|
|
5
5
|
DiagnosticStore,
|
|
6
6
|
DocumentRootNode,
|
|
7
|
+
FieldRegistry,
|
|
7
8
|
InlineNode,
|
|
8
9
|
MediaCatalog,
|
|
9
10
|
OpaqueBlockNode,
|
|
@@ -11,6 +12,7 @@ import type {
|
|
|
11
12
|
OpaqueInlineNode,
|
|
12
13
|
ParagraphNode,
|
|
13
14
|
PreservationStore,
|
|
15
|
+
SectionBreakNode,
|
|
14
16
|
TableCellNode,
|
|
15
17
|
TableNode,
|
|
16
18
|
TableRowNode,
|
|
@@ -27,17 +29,22 @@ import type {
|
|
|
27
29
|
ParsedImageNode,
|
|
28
30
|
ParsedMainDocument,
|
|
29
31
|
ParsedParagraphNode,
|
|
32
|
+
ParsedSectionBreakNode,
|
|
30
33
|
ParsedSdtNode,
|
|
31
34
|
ParsedTableBlockNode,
|
|
32
35
|
ParsedTableCellNode,
|
|
33
36
|
ParsedTableRowNode,
|
|
34
37
|
} from "../ooxml/parse-main-document.ts";
|
|
38
|
+
import { classifyFieldInstruction, buildFieldRegistry } from "../ooxml/parse-fields.ts";
|
|
35
39
|
|
|
36
40
|
export interface NormalizedTextDocument {
|
|
37
41
|
content: DocumentRootNode;
|
|
38
42
|
media: MediaCatalog;
|
|
39
43
|
preservation: PreservationStore;
|
|
40
44
|
diagnostics: DiagnosticStore;
|
|
45
|
+
finalSectionProperties?: ParsedMainDocument["finalSectionProperties"];
|
|
46
|
+
/** Package-backed field registry built during normalization. */
|
|
47
|
+
fieldRegistry?: FieldRegistry;
|
|
41
48
|
}
|
|
42
49
|
|
|
43
50
|
interface NormalizationState {
|
|
@@ -53,6 +60,7 @@ interface NormalizationState {
|
|
|
53
60
|
export function normalizeParsedTextDocument(
|
|
54
61
|
document: ParsedMainDocument,
|
|
55
62
|
packagePartName = "/word/document.xml",
|
|
63
|
+
options?: { styles?: import("../../model/canonical-document.ts").StylesCatalog },
|
|
56
64
|
): NormalizedTextDocument {
|
|
57
65
|
const state: NormalizationState = {
|
|
58
66
|
nextFragmentIndex: 1,
|
|
@@ -72,61 +80,82 @@ export function normalizeParsedTextDocument(
|
|
|
72
80
|
},
|
|
73
81
|
};
|
|
74
82
|
|
|
75
|
-
const children =
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
) {
|
|
81
|
-
|
|
83
|
+
const children: BlockNode[] = [];
|
|
84
|
+
let previousParagraph = false;
|
|
85
|
+
|
|
86
|
+
for (const block of document.blocks) {
|
|
87
|
+
const normalizedBlocks = normalizeBlocks(block, state, packagePartName);
|
|
88
|
+
for (const normalizedBlock of normalizedBlocks) {
|
|
89
|
+
if (previousParagraph && normalizedBlock.type === "paragraph") {
|
|
90
|
+
state.cursor += 1;
|
|
91
|
+
}
|
|
92
|
+
children.push(normalizedBlock);
|
|
93
|
+
previousParagraph = normalizedBlock.type === "paragraph";
|
|
82
94
|
}
|
|
95
|
+
}
|
|
83
96
|
|
|
84
|
-
|
|
85
|
-
|
|
97
|
+
const content: DocumentRootNode = { type: "doc", children };
|
|
98
|
+
|
|
99
|
+
// Build the field registry from normalized content.
|
|
100
|
+
// When styles are available, the registry includes full TOC heading resolution.
|
|
101
|
+
// Without styles, it still catalogs all field instances for the supported/preserve-only partition.
|
|
102
|
+
const styles = options?.styles ?? { paragraphs: {}, characters: {}, tables: {} };
|
|
103
|
+
const fieldRegistry = buildFieldRegistry({ content, styles });
|
|
104
|
+
const hasFields = fieldRegistry.supported.length > 0 || fieldRegistry.preserveOnly.length > 0;
|
|
86
105
|
|
|
87
106
|
return {
|
|
88
|
-
content
|
|
89
|
-
type: "doc",
|
|
90
|
-
children,
|
|
91
|
-
},
|
|
107
|
+
content,
|
|
92
108
|
media: state.media,
|
|
93
109
|
preservation: state.preservation,
|
|
94
110
|
diagnostics: state.diagnostics,
|
|
111
|
+
...(document.finalSectionProperties !== undefined
|
|
112
|
+
? { finalSectionProperties: document.finalSectionProperties }
|
|
113
|
+
: {}),
|
|
114
|
+
...(hasFields ? { fieldRegistry } : {}),
|
|
95
115
|
};
|
|
96
116
|
}
|
|
97
117
|
|
|
98
|
-
function
|
|
118
|
+
function normalizeBlocks(
|
|
99
119
|
block: ParsedBlockNode,
|
|
100
120
|
state: NormalizationState,
|
|
101
121
|
packagePartName: string,
|
|
102
|
-
): BlockNode {
|
|
122
|
+
): BlockNode[] {
|
|
103
123
|
if (block.type === "opaque_block") {
|
|
104
124
|
const opaque = recordOpaqueFragment("opaque_block", block.rawXml, state, packagePartName);
|
|
105
125
|
state.cursor += 1;
|
|
106
|
-
return
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
126
|
+
return [
|
|
127
|
+
{
|
|
128
|
+
type: "opaque_block",
|
|
129
|
+
fragmentId: opaque.fragmentId,
|
|
130
|
+
warningId: opaque.warningId,
|
|
131
|
+
},
|
|
132
|
+
];
|
|
111
133
|
}
|
|
112
134
|
|
|
113
135
|
if (block.type === "table") {
|
|
114
|
-
return normalizeTable(block, state, packagePartName);
|
|
136
|
+
return [normalizeTable(block, state, packagePartName)];
|
|
115
137
|
}
|
|
116
138
|
|
|
117
139
|
if (block.type === "sdt") {
|
|
118
|
-
return normalizeSdt(block, state, packagePartName);
|
|
140
|
+
return [normalizeSdt(block, state, packagePartName)];
|
|
119
141
|
}
|
|
120
142
|
|
|
121
143
|
if (block.type === "custom_xml") {
|
|
122
|
-
return normalizeCustomXml(block, state, packagePartName);
|
|
144
|
+
return [normalizeCustomXml(block, state, packagePartName)];
|
|
123
145
|
}
|
|
124
146
|
|
|
125
147
|
if (block.type === "alt_chunk") {
|
|
126
|
-
return normalizeAltChunk(block, state);
|
|
148
|
+
return [normalizeAltChunk(block, state)];
|
|
127
149
|
}
|
|
128
150
|
|
|
129
|
-
|
|
151
|
+
if (block.type === "section_break") {
|
|
152
|
+
return [normalizeSectionBreak(block)];
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const normalizedParagraph = normalizeParagraph(block, state, packagePartName);
|
|
156
|
+
return block.sectionProperties
|
|
157
|
+
? [normalizedParagraph, normalizeInlineSectionBreak(block)]
|
|
158
|
+
: [normalizedParagraph];
|
|
130
159
|
}
|
|
131
160
|
|
|
132
161
|
function normalizeParagraph(
|
|
@@ -141,6 +170,9 @@ function normalizeParagraph(
|
|
|
141
170
|
...(paragraph.numbering ? { numbering: paragraph.numbering } : {}),
|
|
142
171
|
...(paragraph.alignment ? { alignment: paragraph.alignment } : {}),
|
|
143
172
|
...(paragraph.spacing ? { spacing: paragraph.spacing } : {}),
|
|
173
|
+
...(paragraph.contextualSpacing !== undefined
|
|
174
|
+
? { contextualSpacing: paragraph.contextualSpacing }
|
|
175
|
+
: {}),
|
|
144
176
|
...(paragraph.indentation ? { indentation: paragraph.indentation } : {}),
|
|
145
177
|
...(paragraph.tabStops && paragraph.tabStops.length > 0 ? { tabStops: paragraph.tabStops } : {}),
|
|
146
178
|
...(paragraph.keepNext ? { keepNext: paragraph.keepNext } : {}),
|
|
@@ -190,7 +222,7 @@ function normalizeTableCell(
|
|
|
190
222
|
): TableCellNode {
|
|
191
223
|
const children: BlockNode[] = [];
|
|
192
224
|
for (const block of cell.children) {
|
|
193
|
-
children.push(
|
|
225
|
+
children.push(...normalizeBlocks(block, state, packagePartName));
|
|
194
226
|
}
|
|
195
227
|
// Ensure at least one child (OOXML requires at least one <w:p> per cell)
|
|
196
228
|
if (children.length === 0) {
|
|
@@ -213,7 +245,7 @@ function normalizeSdt(
|
|
|
213
245
|
return {
|
|
214
246
|
type: "sdt",
|
|
215
247
|
properties: { ...block.properties },
|
|
216
|
-
children: block.children.
|
|
248
|
+
children: block.children.flatMap((child) => normalizeBlocks(child, state, packagePartName)),
|
|
217
249
|
};
|
|
218
250
|
}
|
|
219
251
|
|
|
@@ -226,7 +258,27 @@ function normalizeCustomXml(
|
|
|
226
258
|
type: "custom_xml",
|
|
227
259
|
...(block.uri ? { uri: block.uri } : {}),
|
|
228
260
|
...(block.element ? { element: block.element } : {}),
|
|
229
|
-
children: block.children.
|
|
261
|
+
children: block.children.flatMap((child) => normalizeBlocks(child, state, packagePartName)),
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function normalizeSectionBreak(block: ParsedSectionBreakNode): SectionBreakNode {
|
|
266
|
+
return {
|
|
267
|
+
type: "section_break",
|
|
268
|
+
sectionPropertiesXml: block.sectionPropertiesXml,
|
|
269
|
+
sectionProperties: block.sectionProperties,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function normalizeInlineSectionBreak(
|
|
274
|
+
paragraph: ParsedParagraphNode,
|
|
275
|
+
): SectionBreakNode {
|
|
276
|
+
return {
|
|
277
|
+
type: "section_break",
|
|
278
|
+
...(paragraph.sectionPropertiesXml
|
|
279
|
+
? { sectionPropertiesXml: paragraph.sectionPropertiesXml }
|
|
280
|
+
: {}),
|
|
281
|
+
sectionProperties: paragraph.sectionProperties!,
|
|
230
282
|
};
|
|
231
283
|
}
|
|
232
284
|
|
|
@@ -324,6 +376,7 @@ function normalizeInlineChildren(
|
|
|
324
376
|
type: "shape",
|
|
325
377
|
...(node.text ? { text: node.text } : {}),
|
|
326
378
|
...(node.geometry ? { geometry: node.geometry } : {}),
|
|
379
|
+
...(node.isTextBox ? { isTextBox: true } : {}),
|
|
327
380
|
rawXml: node.rawXml,
|
|
328
381
|
});
|
|
329
382
|
state.cursor += 1;
|
|
@@ -359,15 +412,33 @@ function normalizeInlineChildren(
|
|
|
359
412
|
bookmarkId: node.bookmarkId,
|
|
360
413
|
});
|
|
361
414
|
break;
|
|
362
|
-
case "
|
|
415
|
+
case "footnote_ref":
|
|
416
|
+
normalized.push({
|
|
417
|
+
type: "footnote_ref",
|
|
418
|
+
noteId: node.noteId,
|
|
419
|
+
noteKind: node.noteKind,
|
|
420
|
+
});
|
|
421
|
+
state.cursor += 1;
|
|
422
|
+
break;
|
|
423
|
+
case "field": {
|
|
424
|
+
const classification = classifyFieldInstruction(node.instruction);
|
|
425
|
+
const cursorBeforeField = state.cursor;
|
|
426
|
+
const fieldChildren = node.children
|
|
427
|
+
? normalizeInlineChildren(node.children, state, packagePartName)
|
|
428
|
+
: normalizeFieldContentXml(node.contentXml ?? "");
|
|
429
|
+
state.cursor = cursorBeforeField;
|
|
363
430
|
normalized.push({
|
|
364
431
|
type: "field",
|
|
365
432
|
fieldType: node.fieldType,
|
|
366
433
|
instruction: node.instruction,
|
|
367
|
-
children:
|
|
434
|
+
children: fieldChildren,
|
|
435
|
+
fieldFamily: classification.family,
|
|
436
|
+
...(classification.target ? { fieldTarget: classification.target } : {}),
|
|
437
|
+
refreshStatus: classification.supported ? "stale" : "preserve-only",
|
|
368
438
|
});
|
|
369
|
-
state.cursor += 1;
|
|
439
|
+
state.cursor += fieldChildren.length > 0 ? fieldChildren.length : 1;
|
|
370
440
|
break;
|
|
441
|
+
}
|
|
371
442
|
}
|
|
372
443
|
}
|
|
373
444
|
|
|
@@ -378,7 +449,8 @@ function normalizeImageNode(
|
|
|
378
449
|
node: ParsedImageNode,
|
|
379
450
|
state: NormalizationState,
|
|
380
451
|
): InlineNode {
|
|
381
|
-
|
|
452
|
+
const existingMediaItem = state.media.items[node.mediaId];
|
|
453
|
+
if (!existingMediaItem) {
|
|
382
454
|
const packagePartName =
|
|
383
455
|
typeof node.packagePartName === "string" && node.packagePartName.length > 0
|
|
384
456
|
? node.packagePartName
|
|
@@ -394,6 +466,17 @@ function normalizeImageNode(
|
|
|
394
466
|
packagePartName,
|
|
395
467
|
...(node.relationshipId ? { relationshipId: node.relationshipId } : {}),
|
|
396
468
|
...(node.altText ? { altText: node.altText } : {}),
|
|
469
|
+
...(node.widthEmu !== undefined ? { widthEmu: node.widthEmu } : {}),
|
|
470
|
+
...(node.heightEmu !== undefined ? { heightEmu: node.heightEmu } : {}),
|
|
471
|
+
};
|
|
472
|
+
} else if (
|
|
473
|
+
node.widthEmu !== undefined ||
|
|
474
|
+
node.heightEmu !== undefined
|
|
475
|
+
) {
|
|
476
|
+
state.media.items[node.mediaId] = {
|
|
477
|
+
...existingMediaItem,
|
|
478
|
+
...(node.widthEmu !== undefined ? { widthEmu: node.widthEmu } : {}),
|
|
479
|
+
...(node.heightEmu !== undefined ? { heightEmu: node.heightEmu } : {}),
|
|
397
480
|
};
|
|
398
481
|
}
|
|
399
482
|
|
|
@@ -521,3 +604,32 @@ function recordOpaqueFragment(
|
|
|
521
604
|
warningId,
|
|
522
605
|
};
|
|
523
606
|
}
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* Extract text content from field contentXml to populate the field's children
|
|
610
|
+
* array. This enables cross-reference and TOC content to be visible in the
|
|
611
|
+
* canonical model and surface projection.
|
|
612
|
+
*/
|
|
613
|
+
function normalizeFieldContentXml(contentXml: string | undefined): InlineNode[] {
|
|
614
|
+
if (!contentXml || contentXml.trim().length === 0) {
|
|
615
|
+
return [];
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
// Extract text from <w:t> elements within the content runs
|
|
619
|
+
const textPattern = /<w:t\b[^>]*>([\s\S]*?)<\/w:t>/g;
|
|
620
|
+
const children: InlineNode[] = [];
|
|
621
|
+
|
|
622
|
+
for (const match of contentXml.matchAll(textPattern)) {
|
|
623
|
+
const text = match[1]
|
|
624
|
+
.replace(/&/g, "&")
|
|
625
|
+
.replace(/</g, "<")
|
|
626
|
+
.replace(/>/g, ">")
|
|
627
|
+
.replace(/"/g, '"')
|
|
628
|
+
.replace(/'/g, "'");
|
|
629
|
+
if (text.length > 0) {
|
|
630
|
+
children.push({ type: "text", text });
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
return children;
|
|
635
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export const HIGHLIGHT_COLOR_MAP = {
|
|
2
|
+
black: "000000",
|
|
3
|
+
blue: "0000FF",
|
|
4
|
+
cyan: "00FFFF",
|
|
5
|
+
darkBlue: "000080",
|
|
6
|
+
darkCyan: "008080",
|
|
7
|
+
darkGray: "808080",
|
|
8
|
+
darkGreen: "008000",
|
|
9
|
+
darkMagenta: "800080",
|
|
10
|
+
darkRed: "8B0000",
|
|
11
|
+
darkYellow: "808000",
|
|
12
|
+
green: "00FF00",
|
|
13
|
+
lightGray: "C0C0C0",
|
|
14
|
+
magenta: "FF00FF",
|
|
15
|
+
red: "FF0000",
|
|
16
|
+
white: "FFFFFF",
|
|
17
|
+
yellow: "FFFF00",
|
|
18
|
+
} as const;
|
|
19
|
+
|
|
20
|
+
export type HighlightColorName = keyof typeof HIGHLIGHT_COLOR_MAP;
|
|
21
|
+
|
|
22
|
+
export function resolveHighlightColor(
|
|
23
|
+
value: string | null | undefined,
|
|
24
|
+
): { color: string; val: HighlightColorName } | undefined {
|
|
25
|
+
if (!value || value === "none") {
|
|
26
|
+
return undefined;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const normalizedValue = value as HighlightColorName;
|
|
30
|
+
const color = HIGHLIGHT_COLOR_MAP[normalizedValue];
|
|
31
|
+
if (!color) {
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
color,
|
|
37
|
+
val: normalizedValue,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import type { NumberingCatalog } from "../../model/canonical-document.ts";
|
|
2
|
+
|
|
3
|
+
export const DOCX_NULL_NUMBERING_INSTANCE_ID = "num:0";
|
|
4
|
+
export const DOCX_NULL_ABSTRACT_NUMBERING_ID = "abstract-num:__docx-import-null__";
|
|
5
|
+
|
|
6
|
+
export function createSyntheticDocxNullNumberingCatalog(): Pick<
|
|
7
|
+
NumberingCatalog,
|
|
8
|
+
"abstractDefinitions" | "instances"
|
|
9
|
+
> {
|
|
10
|
+
return {
|
|
11
|
+
abstractDefinitions: {
|
|
12
|
+
[DOCX_NULL_ABSTRACT_NUMBERING_ID]: {
|
|
13
|
+
abstractNumberingId: DOCX_NULL_ABSTRACT_NUMBERING_ID,
|
|
14
|
+
levels: Array.from({ length: 9 }, (_unused, level) => ({
|
|
15
|
+
level,
|
|
16
|
+
format: "none",
|
|
17
|
+
text: "",
|
|
18
|
+
})),
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
instances: {
|
|
22
|
+
[DOCX_NULL_NUMBERING_INSTANCE_ID]: {
|
|
23
|
+
numberingInstanceId: DOCX_NULL_NUMBERING_INSTANCE_ID,
|
|
24
|
+
abstractNumberingId: DOCX_NULL_ABSTRACT_NUMBERING_ID,
|
|
25
|
+
overrides: [],
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function isSyntheticDocxNullAbstractDefinition(
|
|
32
|
+
definition: NumberingCatalog["abstractDefinitions"][string],
|
|
33
|
+
): boolean {
|
|
34
|
+
return definition.abstractNumberingId === DOCX_NULL_ABSTRACT_NUMBERING_ID;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function isSyntheticDocxNullNumberingInstance(
|
|
38
|
+
instance: NumberingCatalog["instances"][string],
|
|
39
|
+
): boolean {
|
|
40
|
+
return (
|
|
41
|
+
instance.numberingInstanceId === DOCX_NULL_NUMBERING_INSTANCE_ID &&
|
|
42
|
+
instance.abstractNumberingId === DOCX_NULL_ABSTRACT_NUMBERING_ID
|
|
43
|
+
);
|
|
44
|
+
}
|