@beyondwork/docx-react-component 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +8 -2
  2. package/package.json +24 -34
  3. package/src/api/README.md +5 -1
  4. package/src/api/public-types.ts +710 -4
  5. package/src/api/session-state.ts +60 -0
  6. package/src/core/commands/formatting-commands.ts +2 -1
  7. package/src/core/commands/image-commands.ts +147 -0
  8. package/src/core/commands/index.ts +19 -3
  9. package/src/core/commands/list-commands.ts +231 -36
  10. package/src/core/commands/paragraph-layout-commands.ts +339 -0
  11. package/src/core/commands/section-layout-commands.ts +680 -0
  12. package/src/core/commands/style-commands.ts +262 -0
  13. package/src/core/search/search-text.ts +357 -0
  14. package/src/core/selection/mapping.ts +41 -0
  15. package/src/core/state/editor-state.ts +4 -1
  16. package/src/index.ts +51 -0
  17. package/src/io/docx-session.ts +623 -56
  18. package/src/io/export/serialize-comments.ts +104 -34
  19. package/src/io/export/serialize-footnotes.ts +198 -1
  20. package/src/io/export/serialize-headers-footers.ts +203 -10
  21. package/src/io/export/serialize-main-document.ts +285 -8
  22. package/src/io/export/serialize-numbering.ts +28 -7
  23. package/src/io/export/split-review-boundaries.ts +181 -19
  24. package/src/io/normalize/normalize-text.ts +144 -32
  25. package/src/io/ooxml/highlight-colors.ts +39 -0
  26. package/src/io/ooxml/numbering-sentinels.ts +44 -0
  27. package/src/io/ooxml/parse-comments.ts +85 -19
  28. package/src/io/ooxml/parse-fields.ts +396 -0
  29. package/src/io/ooxml/parse-footnotes.ts +452 -22
  30. package/src/io/ooxml/parse-headers-footers.ts +657 -29
  31. package/src/io/ooxml/parse-inline-media.ts +30 -0
  32. package/src/io/ooxml/parse-main-document.ts +807 -20
  33. package/src/io/ooxml/parse-numbering.ts +7 -0
  34. package/src/io/ooxml/parse-revisions.ts +317 -38
  35. package/src/io/ooxml/parse-settings.ts +184 -0
  36. package/src/io/ooxml/parse-shapes.ts +25 -0
  37. package/src/io/ooxml/parse-styles.ts +463 -0
  38. package/src/io/ooxml/parse-theme.ts +32 -0
  39. package/src/legal/bookmarks.ts +44 -0
  40. package/src/legal/cross-references.ts +59 -1
  41. package/src/model/canonical-document.ts +250 -4
  42. package/src/model/cds-1.0.0.ts +13 -0
  43. package/src/model/snapshot.ts +87 -2
  44. package/src/review/store/revision-store.ts +6 -0
  45. package/src/review/store/revision-types.ts +1 -0
  46. package/src/runtime/document-layout.ts +332 -0
  47. package/src/runtime/document-navigation.ts +603 -0
  48. package/src/runtime/document-runtime.ts +1754 -78
  49. package/src/runtime/document-search.ts +145 -0
  50. package/src/runtime/numbering-prefix.ts +47 -26
  51. package/src/runtime/page-layout-estimation.ts +212 -0
  52. package/src/runtime/read-only-diagnostics-runtime.ts +9 -0
  53. package/src/runtime/session-capabilities.ts +35 -3
  54. package/src/runtime/story-context.ts +164 -0
  55. package/src/runtime/story-targeting.ts +162 -0
  56. package/src/runtime/surface-projection.ts +324 -36
  57. package/src/runtime/table-schema.ts +89 -7
  58. package/src/runtime/view-state.ts +477 -0
  59. package/src/runtime/workflow-markup.ts +349 -0
  60. package/src/ui/WordReviewEditor.tsx +2469 -1344
  61. package/src/ui/browser-export.ts +52 -0
  62. package/src/ui/editor-command-bag.ts +120 -0
  63. package/src/ui/editor-runtime-boundary.ts +1422 -0
  64. package/src/ui/editor-shell-view.tsx +134 -0
  65. package/src/ui/editor-surface-controller.tsx +51 -0
  66. package/src/ui/headless/preserve-editor-selection.ts +5 -0
  67. package/src/ui/headless/revision-decoration-model.ts +4 -4
  68. package/src/ui/headless/selection-helpers.ts +20 -0
  69. package/src/ui/headless/selection-toolbar-model.ts +22 -0
  70. package/src/ui/headless/use-editor-keyboard.ts +6 -1
  71. package/src/ui/runtime-snapshot-selectors.ts +197 -0
  72. package/src/ui-tailwind/chrome/tw-alert-banner.tsx +18 -2
  73. package/src/ui-tailwind/chrome/tw-image-context-toolbar.tsx +129 -0
  74. package/src/ui-tailwind/chrome/tw-layout-panel.tsx +114 -0
  75. package/src/ui-tailwind/chrome/tw-object-context-toolbar.tsx +34 -0
  76. package/src/ui-tailwind/chrome/tw-page-ruler.tsx +386 -0
  77. package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +150 -14
  78. package/src/ui-tailwind/chrome/tw-table-context-toolbar.tsx +128 -0
  79. package/src/ui-tailwind/editor-surface/perf-probe.ts +179 -0
  80. package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +46 -7
  81. package/src/ui-tailwind/editor-surface/pm-contextual-ui.ts +31 -0
  82. package/src/ui-tailwind/editor-surface/pm-decorations.ts +35 -0
  83. package/src/ui-tailwind/editor-surface/pm-position-map.ts +3 -3
  84. package/src/ui-tailwind/editor-surface/pm-schema.ts +186 -13
  85. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +191 -68
  86. package/src/ui-tailwind/editor-surface/search-plugin.ts +19 -68
  87. package/src/ui-tailwind/editor-surface/surface-build-keys.ts +51 -0
  88. package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +11 -0
  89. package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +7 -1
  90. package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +528 -85
  91. package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +0 -1
  92. package/src/ui-tailwind/index.ts +2 -1
  93. package/src/ui-tailwind/page-chrome-model.ts +27 -0
  94. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +277 -147
  95. package/src/ui-tailwind/review/tw-health-panel.tsx +31 -2
  96. package/src/ui-tailwind/review/tw-review-rail.tsx +8 -8
  97. package/src/ui-tailwind/review/tw-revision-sidebar.tsx +15 -15
  98. package/src/ui-tailwind/theme/editor-theme.css +127 -0
  99. package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +4 -0
  100. package/src/ui-tailwind/toolbar/tw-toolbar.tsx +829 -12
  101. package/src/ui-tailwind/tw-review-workspace.tsx +1238 -42
  102. package/src/validation/compatibility-engine.ts +119 -24
  103. package/src/validation/compatibility-report.ts +1 -0
  104. package/src/validation/diagnostics.ts +1 -0
  105. package/src/validation/docx-comment-proof.ts +707 -0
@@ -1,15 +1,23 @@
1
1
  import type { NumberingCatalog, ParagraphNode } from "../../model/canonical-document.ts";
2
+ import {
3
+ isSyntheticDocxNullAbstractDefinition,
4
+ isSyntheticDocxNullNumberingInstance,
5
+ } from "../ooxml/numbering-sentinels.ts";
2
6
 
3
7
  export const WORD_NUMBERING_CONTENT_TYPE =
4
8
  "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml";
5
9
 
6
10
  export function serializeNumberingXml(catalog: NumberingCatalog): string {
7
- const abstractDefinitions = Object.values(catalog.abstractDefinitions).sort((left, right) =>
8
- compareSerializedIds(left.abstractNumberingId, right.abstractNumberingId),
9
- );
10
- const instances = Object.values(catalog.instances).sort((left, right) =>
11
- compareSerializedIds(left.numberingInstanceId, right.numberingInstanceId),
12
- );
11
+ const abstractDefinitions = Object.values(catalog.abstractDefinitions)
12
+ .filter((definition) => !isSyntheticDocxNullAbstractDefinition(definition))
13
+ .sort((left, right) =>
14
+ compareSerializedIds(left.abstractNumberingId, right.abstractNumberingId),
15
+ );
16
+ const instances = Object.values(catalog.instances)
17
+ .filter((instance) => !isSyntheticDocxNullNumberingInstance(instance))
18
+ .sort((left, right) =>
19
+ compareSerializedIds(left.numberingInstanceId, right.numberingInstanceId),
20
+ );
13
21
 
14
22
  const body = [
15
23
  ...abstractDefinitions.map((definition) => serializeAbstractDefinition(definition)),
@@ -22,6 +30,17 @@ export function serializeNumberingXml(catalog: NumberingCatalog): string {
22
30
  ].join("\n");
23
31
  }
24
32
 
33
+ export function hasSerializableNumberingEntries(catalog: NumberingCatalog): boolean {
34
+ return (
35
+ Object.values(catalog.abstractDefinitions).some(
36
+ (definition) => !isSyntheticDocxNullAbstractDefinition(definition),
37
+ ) ||
38
+ Object.values(catalog.instances).some(
39
+ (instance) => !isSyntheticDocxNullNumberingInstance(instance),
40
+ )
41
+ );
42
+ }
43
+
25
44
  export function serializeParagraphNumberingProperties(
26
45
  numbering: ParagraphNode["numbering"],
27
46
  ): string {
@@ -51,10 +70,12 @@ function serializeLevel(level: NumberingCatalog["abstractDefinitions"][string]["
51
70
  const paragraphStyle = level.paragraphStyleId
52
71
  ? `<w:pStyle w:val="${escapeAttribute(level.paragraphStyleId)}"/>`
53
72
  : "";
73
+ const isLegal = level.isLegalNumbering ? "<w:isLgl/>" : "";
74
+ const suffix = level.suffix ? `<w:suff w:val="${escapeAttribute(level.suffix)}"/>` : "";
54
75
 
55
76
  return `<w:lvl w:ilvl="${level.level}">${start}<w:numFmt w:val="${escapeAttribute(
56
77
  level.format,
57
- )}"/><w:lvlText w:val="${escapeAttribute(level.text)}"/>${paragraphStyle}</w:lvl>`;
78
+ )}"/><w:lvlText w:val="${escapeAttribute(level.text)}"/>${paragraphStyle}${isLegal}${suffix}</w:lvl>`;
58
79
  }
59
80
 
60
81
  function serializeInstance(instance: NumberingCatalog["instances"][string]): string {
@@ -18,25 +18,9 @@ export function splitDocumentAtReviewBoundaries(
18
18
  return content;
19
19
  }
20
20
 
21
- let cursor = 0;
22
- const children = content.children.map((block, index) => {
23
- if (index > 0 && content.children[index - 1]?.type === "paragraph" && block.type === "paragraph") {
24
- cursor += 1;
25
- }
26
-
27
- if (block.type !== "paragraph") {
28
- cursor += 1;
29
- return block;
30
- }
31
-
32
- const next = splitParagraph(block, splitPositions, cursor);
33
- cursor = next.cursor;
34
- return next.paragraph;
35
- });
36
-
37
21
  return {
38
22
  type: "doc",
39
- children,
23
+ children: splitBlockNodes(content.children, splitPositions, 0, true).children,
40
24
  };
41
25
  }
42
26
 
@@ -98,7 +82,7 @@ function splitParagraph(
98
82
  }
99
83
 
100
84
  children.push(child);
101
- nextCursor += 1;
85
+ nextCursor += measureInlineNodeForReviewBoundaries(child);
102
86
  }
103
87
 
104
88
  return {
@@ -110,6 +94,147 @@ function splitParagraph(
110
94
  };
111
95
  }
112
96
 
97
+ function splitBlockNodes(
98
+ blocks: readonly DocumentRootNode["children"][number][],
99
+ splitPositions: ReadonlySet<number>,
100
+ cursor: number,
101
+ useSurfaceParagraphSeparators: boolean,
102
+ ): {
103
+ children: DocumentRootNode["children"];
104
+ cursor: number;
105
+ } {
106
+ const children: DocumentRootNode["children"] = [];
107
+ let nextCursor = cursor;
108
+ for (const [index, block] of blocks.entries()) {
109
+ if (block.type === "paragraph") {
110
+ if (useSurfaceParagraphSeparators && index > 0) {
111
+ nextCursor += 1;
112
+ }
113
+ const next = splitParagraph(block, splitPositions, nextCursor);
114
+ children.push(next.paragraph);
115
+ nextCursor = next.cursor;
116
+ continue;
117
+ }
118
+
119
+ if (block.type === "table") {
120
+ const next = splitTableAtReviewBoundaries(block, splitPositions, nextCursor);
121
+ children.push(next.table);
122
+ nextCursor = next.cursor;
123
+ continue;
124
+ }
125
+
126
+ if (block.type === "sdt") {
127
+ const next = splitBlockNodes(block.children, splitPositions, nextCursor, false);
128
+ children.push({
129
+ ...block,
130
+ children: next.children,
131
+ });
132
+ nextCursor = next.cursor;
133
+ continue;
134
+ }
135
+
136
+ if (block.type === "custom_xml") {
137
+ children.push(block);
138
+ nextCursor += 1;
139
+ continue;
140
+ }
141
+
142
+ children.push(block);
143
+ nextCursor += 1;
144
+ }
145
+
146
+ return {
147
+ children,
148
+ cursor: nextCursor,
149
+ };
150
+ }
151
+
152
+ function splitTableAtReviewBoundaries(
153
+ table: Extract<DocumentRootNode["children"][number], { type: "table" }>,
154
+ splitPositions: ReadonlySet<number>,
155
+ cursor: number,
156
+ ): {
157
+ table: Extract<DocumentRootNode["children"][number], { type: "table" }>;
158
+ cursor: number;
159
+ } {
160
+ let nextCursor = cursor;
161
+ const rows = table.rows.map((row) => ({
162
+ ...row,
163
+ cells: row.cells.map((cell) => {
164
+ const next = splitBlockNodes(cell.children, splitPositions, nextCursor, false);
165
+ nextCursor = next.cursor;
166
+ return {
167
+ ...cell,
168
+ children: next.children,
169
+ };
170
+ }),
171
+ }));
172
+
173
+ return {
174
+ table: {
175
+ ...table,
176
+ rows,
177
+ },
178
+ cursor: nextCursor,
179
+ };
180
+ }
181
+
182
+ function advanceCursorThroughTable(
183
+ table: Extract<DocumentRootNode["children"][number], { type: "table" }>,
184
+ cursor: number,
185
+ ): number {
186
+ let nextCursor = cursor;
187
+ for (const row of table.rows) {
188
+ for (const cell of row.cells) {
189
+ nextCursor = measureBlockNodesForReviewBoundaries(cell.children, nextCursor, false);
190
+ }
191
+ }
192
+ return nextCursor;
193
+ }
194
+
195
+ function measureBlockNodesForReviewBoundaries(
196
+ blocks: readonly DocumentRootNode["children"][number][],
197
+ cursor: number,
198
+ useSurfaceParagraphSeparators: boolean,
199
+ ): number {
200
+ let nextCursor = cursor;
201
+ for (const [index, block] of blocks.entries()) {
202
+ if (block.type === "paragraph") {
203
+ if (useSurfaceParagraphSeparators && index > 0) {
204
+ nextCursor += 1;
205
+ }
206
+ nextCursor += block.children.reduce(
207
+ (size, child) => size + measureInlineNodeForReviewBoundaries(child),
208
+ 0,
209
+ );
210
+ continue;
211
+ }
212
+
213
+ if (block.type === "table") {
214
+ nextCursor = advanceCursorThroughTable(block, nextCursor);
215
+ continue;
216
+ }
217
+
218
+ if (block.type === "sdt") {
219
+ nextCursor = measureBlockNodesForReviewBoundaries(
220
+ block.children,
221
+ nextCursor,
222
+ false,
223
+ );
224
+ continue;
225
+ }
226
+
227
+ if (block.type === "custom_xml") {
228
+ nextCursor += 1;
229
+ continue;
230
+ }
231
+
232
+ nextCursor += 1;
233
+ }
234
+
235
+ return nextCursor;
236
+ }
237
+
113
238
  function splitHyperlinkNode(
114
239
  node: HyperlinkNode,
115
240
  splitPositions: ReadonlySet<number>,
@@ -132,7 +257,7 @@ function splitHyperlinkNode(
132
257
  }
133
258
 
134
259
  groups[groups.length - 1]?.push(child);
135
- nextCursor += 1;
260
+ nextCursor += measureInlineNodeForReviewBoundaries(child);
136
261
  if (splitPositions.has(nextCursor)) {
137
262
  groups.push([]);
138
263
  }
@@ -192,3 +317,40 @@ function splitTextNode(
192
317
  cursor: cursor + codepoints.length,
193
318
  };
194
319
  }
320
+
321
+ function measureInlineNodeForReviewBoundaries(node: InlineNode): number {
322
+ switch (node.type) {
323
+ case "text":
324
+ return Array.from(node.text).length;
325
+ case "bookmark_start":
326
+ case "bookmark_end":
327
+ return 0;
328
+ case "hyperlink":
329
+ return node.children.reduce(
330
+ (size, child) => size + measureInlineNodeForReviewBoundaries(child),
331
+ 0,
332
+ );
333
+ case "field": {
334
+ const childWidth = node.children.reduce(
335
+ (size, child) => size + measureInlineNodeForReviewBoundaries(child),
336
+ 0,
337
+ );
338
+ return childWidth > 0 ? childWidth : 1;
339
+ }
340
+ case "tab":
341
+ case "hard_break":
342
+ case "column_break":
343
+ case "footnote_ref":
344
+ case "image":
345
+ case "opaque_inline":
346
+ case "chart_preview":
347
+ case "smartart_preview":
348
+ case "shape":
349
+ case "wordart":
350
+ case "vml_shape":
351
+ case "symbol":
352
+ return 1;
353
+ default:
354
+ return 1;
355
+ }
356
+ }
@@ -4,6 +4,7 @@ import type {
4
4
  CustomXmlNode,
5
5
  DiagnosticStore,
6
6
  DocumentRootNode,
7
+ FieldRegistry,
7
8
  InlineNode,
8
9
  MediaCatalog,
9
10
  OpaqueBlockNode,
@@ -11,6 +12,7 @@ import type {
11
12
  OpaqueInlineNode,
12
13
  ParagraphNode,
13
14
  PreservationStore,
15
+ SectionBreakNode,
14
16
  TableCellNode,
15
17
  TableNode,
16
18
  TableRowNode,
@@ -27,17 +29,22 @@ import type {
27
29
  ParsedImageNode,
28
30
  ParsedMainDocument,
29
31
  ParsedParagraphNode,
32
+ ParsedSectionBreakNode,
30
33
  ParsedSdtNode,
31
34
  ParsedTableBlockNode,
32
35
  ParsedTableCellNode,
33
36
  ParsedTableRowNode,
34
37
  } from "../ooxml/parse-main-document.ts";
38
+ import { classifyFieldInstruction, buildFieldRegistry } from "../ooxml/parse-fields.ts";
35
39
 
36
40
  export interface NormalizedTextDocument {
37
41
  content: DocumentRootNode;
38
42
  media: MediaCatalog;
39
43
  preservation: PreservationStore;
40
44
  diagnostics: DiagnosticStore;
45
+ finalSectionProperties?: ParsedMainDocument["finalSectionProperties"];
46
+ /** Package-backed field registry built during normalization. */
47
+ fieldRegistry?: FieldRegistry;
41
48
  }
42
49
 
43
50
  interface NormalizationState {
@@ -53,6 +60,7 @@ interface NormalizationState {
53
60
  export function normalizeParsedTextDocument(
54
61
  document: ParsedMainDocument,
55
62
  packagePartName = "/word/document.xml",
63
+ options?: { styles?: import("../../model/canonical-document.ts").StylesCatalog },
56
64
  ): NormalizedTextDocument {
57
65
  const state: NormalizationState = {
58
66
  nextFragmentIndex: 1,
@@ -72,61 +80,82 @@ export function normalizeParsedTextDocument(
72
80
  },
73
81
  };
74
82
 
75
- const children = document.blocks.map((block, index) => {
76
- if (
77
- index > 0 &&
78
- document.blocks[index - 1]?.type === "paragraph" &&
79
- block.type === "paragraph"
80
- ) {
81
- state.cursor += 1;
83
+ const children: BlockNode[] = [];
84
+ let previousParagraph = false;
85
+
86
+ for (const block of document.blocks) {
87
+ const normalizedBlocks = normalizeBlocks(block, state, packagePartName);
88
+ for (const normalizedBlock of normalizedBlocks) {
89
+ if (previousParagraph && normalizedBlock.type === "paragraph") {
90
+ state.cursor += 1;
91
+ }
92
+ children.push(normalizedBlock);
93
+ previousParagraph = normalizedBlock.type === "paragraph";
82
94
  }
95
+ }
83
96
 
84
- return normalizeBlock(block, state, packagePartName);
85
- });
97
+ const content: DocumentRootNode = { type: "doc", children };
98
+
99
+ // Build the field registry from normalized content.
100
+ // When styles are available, the registry includes full TOC heading resolution.
101
+ // Without styles, it still catalogs all field instances for the supported/preserve-only partition.
102
+ const styles = options?.styles ?? { paragraphs: {}, characters: {}, tables: {} };
103
+ const fieldRegistry = buildFieldRegistry({ content, styles });
104
+ const hasFields = fieldRegistry.supported.length > 0 || fieldRegistry.preserveOnly.length > 0;
86
105
 
87
106
  return {
88
- content: {
89
- type: "doc",
90
- children,
91
- },
107
+ content,
92
108
  media: state.media,
93
109
  preservation: state.preservation,
94
110
  diagnostics: state.diagnostics,
111
+ ...(document.finalSectionProperties !== undefined
112
+ ? { finalSectionProperties: document.finalSectionProperties }
113
+ : {}),
114
+ ...(hasFields ? { fieldRegistry } : {}),
95
115
  };
96
116
  }
97
117
 
98
- function normalizeBlock(
118
+ function normalizeBlocks(
99
119
  block: ParsedBlockNode,
100
120
  state: NormalizationState,
101
121
  packagePartName: string,
102
- ): BlockNode {
122
+ ): BlockNode[] {
103
123
  if (block.type === "opaque_block") {
104
124
  const opaque = recordOpaqueFragment("opaque_block", block.rawXml, state, packagePartName);
105
125
  state.cursor += 1;
106
- return {
107
- type: "opaque_block",
108
- fragmentId: opaque.fragmentId,
109
- warningId: opaque.warningId,
110
- };
126
+ return [
127
+ {
128
+ type: "opaque_block",
129
+ fragmentId: opaque.fragmentId,
130
+ warningId: opaque.warningId,
131
+ },
132
+ ];
111
133
  }
112
134
 
113
135
  if (block.type === "table") {
114
- return normalizeTable(block, state, packagePartName);
136
+ return [normalizeTable(block, state, packagePartName)];
115
137
  }
116
138
 
117
139
  if (block.type === "sdt") {
118
- return normalizeSdt(block, state, packagePartName);
140
+ return [normalizeSdt(block, state, packagePartName)];
119
141
  }
120
142
 
121
143
  if (block.type === "custom_xml") {
122
- return normalizeCustomXml(block, state, packagePartName);
144
+ return [normalizeCustomXml(block, state, packagePartName)];
123
145
  }
124
146
 
125
147
  if (block.type === "alt_chunk") {
126
- return normalizeAltChunk(block, state);
148
+ return [normalizeAltChunk(block, state)];
127
149
  }
128
150
 
129
- return normalizeParagraph(block, state, packagePartName);
151
+ if (block.type === "section_break") {
152
+ return [normalizeSectionBreak(block)];
153
+ }
154
+
155
+ const normalizedParagraph = normalizeParagraph(block, state, packagePartName);
156
+ return block.sectionProperties
157
+ ? [normalizedParagraph, normalizeInlineSectionBreak(block)]
158
+ : [normalizedParagraph];
130
159
  }
131
160
 
132
161
  function normalizeParagraph(
@@ -141,6 +170,9 @@ function normalizeParagraph(
141
170
  ...(paragraph.numbering ? { numbering: paragraph.numbering } : {}),
142
171
  ...(paragraph.alignment ? { alignment: paragraph.alignment } : {}),
143
172
  ...(paragraph.spacing ? { spacing: paragraph.spacing } : {}),
173
+ ...(paragraph.contextualSpacing !== undefined
174
+ ? { contextualSpacing: paragraph.contextualSpacing }
175
+ : {}),
144
176
  ...(paragraph.indentation ? { indentation: paragraph.indentation } : {}),
145
177
  ...(paragraph.tabStops && paragraph.tabStops.length > 0 ? { tabStops: paragraph.tabStops } : {}),
146
178
  ...(paragraph.keepNext ? { keepNext: paragraph.keepNext } : {}),
@@ -190,7 +222,7 @@ function normalizeTableCell(
190
222
  ): TableCellNode {
191
223
  const children: BlockNode[] = [];
192
224
  for (const block of cell.children) {
193
- children.push(normalizeBlock(block, state, packagePartName));
225
+ children.push(...normalizeBlocks(block, state, packagePartName));
194
226
  }
195
227
  // Ensure at least one child (OOXML requires at least one <w:p> per cell)
196
228
  if (children.length === 0) {
@@ -213,7 +245,7 @@ function normalizeSdt(
213
245
  return {
214
246
  type: "sdt",
215
247
  properties: { ...block.properties },
216
- children: block.children.map((child) => normalizeBlock(child, state, packagePartName)),
248
+ children: block.children.flatMap((child) => normalizeBlocks(child, state, packagePartName)),
217
249
  };
218
250
  }
219
251
 
@@ -226,7 +258,27 @@ function normalizeCustomXml(
226
258
  type: "custom_xml",
227
259
  ...(block.uri ? { uri: block.uri } : {}),
228
260
  ...(block.element ? { element: block.element } : {}),
229
- children: block.children.map((child) => normalizeBlock(child, state, packagePartName)),
261
+ children: block.children.flatMap((child) => normalizeBlocks(child, state, packagePartName)),
262
+ };
263
+ }
264
+
265
+ function normalizeSectionBreak(block: ParsedSectionBreakNode): SectionBreakNode {
266
+ return {
267
+ type: "section_break",
268
+ sectionPropertiesXml: block.sectionPropertiesXml,
269
+ sectionProperties: block.sectionProperties,
270
+ };
271
+ }
272
+
273
+ function normalizeInlineSectionBreak(
274
+ paragraph: ParsedParagraphNode,
275
+ ): SectionBreakNode {
276
+ return {
277
+ type: "section_break",
278
+ ...(paragraph.sectionPropertiesXml
279
+ ? { sectionPropertiesXml: paragraph.sectionPropertiesXml }
280
+ : {}),
281
+ sectionProperties: paragraph.sectionProperties!,
230
282
  };
231
283
  }
232
284
 
@@ -324,6 +376,7 @@ function normalizeInlineChildren(
324
376
  type: "shape",
325
377
  ...(node.text ? { text: node.text } : {}),
326
378
  ...(node.geometry ? { geometry: node.geometry } : {}),
379
+ ...(node.isTextBox ? { isTextBox: true } : {}),
327
380
  rawXml: node.rawXml,
328
381
  });
329
382
  state.cursor += 1;
@@ -359,15 +412,33 @@ function normalizeInlineChildren(
359
412
  bookmarkId: node.bookmarkId,
360
413
  });
361
414
  break;
362
- case "field":
415
+ case "footnote_ref":
416
+ normalized.push({
417
+ type: "footnote_ref",
418
+ noteId: node.noteId,
419
+ noteKind: node.noteKind,
420
+ });
421
+ state.cursor += 1;
422
+ break;
423
+ case "field": {
424
+ const classification = classifyFieldInstruction(node.instruction);
425
+ const cursorBeforeField = state.cursor;
426
+ const fieldChildren = node.children
427
+ ? normalizeInlineChildren(node.children, state, packagePartName)
428
+ : normalizeFieldContentXml(node.contentXml ?? "");
429
+ state.cursor = cursorBeforeField;
363
430
  normalized.push({
364
431
  type: "field",
365
432
  fieldType: node.fieldType,
366
433
  instruction: node.instruction,
367
- children: [],
434
+ children: fieldChildren,
435
+ fieldFamily: classification.family,
436
+ ...(classification.target ? { fieldTarget: classification.target } : {}),
437
+ refreshStatus: classification.supported ? "stale" : "preserve-only",
368
438
  });
369
- state.cursor += 1;
439
+ state.cursor += fieldChildren.length > 0 ? fieldChildren.length : 1;
370
440
  break;
441
+ }
371
442
  }
372
443
  }
373
444
 
@@ -378,7 +449,8 @@ function normalizeImageNode(
378
449
  node: ParsedImageNode,
379
450
  state: NormalizationState,
380
451
  ): InlineNode {
381
- if (!state.media.items[node.mediaId]) {
452
+ const existingMediaItem = state.media.items[node.mediaId];
453
+ if (!existingMediaItem) {
382
454
  const packagePartName =
383
455
  typeof node.packagePartName === "string" && node.packagePartName.length > 0
384
456
  ? node.packagePartName
@@ -394,6 +466,17 @@ function normalizeImageNode(
394
466
  packagePartName,
395
467
  ...(node.relationshipId ? { relationshipId: node.relationshipId } : {}),
396
468
  ...(node.altText ? { altText: node.altText } : {}),
469
+ ...(node.widthEmu !== undefined ? { widthEmu: node.widthEmu } : {}),
470
+ ...(node.heightEmu !== undefined ? { heightEmu: node.heightEmu } : {}),
471
+ };
472
+ } else if (
473
+ node.widthEmu !== undefined ||
474
+ node.heightEmu !== undefined
475
+ ) {
476
+ state.media.items[node.mediaId] = {
477
+ ...existingMediaItem,
478
+ ...(node.widthEmu !== undefined ? { widthEmu: node.widthEmu } : {}),
479
+ ...(node.heightEmu !== undefined ? { heightEmu: node.heightEmu } : {}),
397
480
  };
398
481
  }
399
482
 
@@ -521,3 +604,32 @@ function recordOpaqueFragment(
521
604
  warningId,
522
605
  };
523
606
  }
607
+
608
+ /**
609
+ * Extract text content from field contentXml to populate the field's children
610
+ * array. This enables cross-reference and TOC content to be visible in the
611
+ * canonical model and surface projection.
612
+ */
613
+ function normalizeFieldContentXml(contentXml: string | undefined): InlineNode[] {
614
+ if (!contentXml || contentXml.trim().length === 0) {
615
+ return [];
616
+ }
617
+
618
+ // Extract text from <w:t> elements within the content runs
619
+ const textPattern = /<w:t\b[^>]*>([\s\S]*?)<\/w:t>/g;
620
+ const children: InlineNode[] = [];
621
+
622
+ for (const match of contentXml.matchAll(textPattern)) {
623
+ const text = match[1]
624
+ .replace(/&amp;/g, "&")
625
+ .replace(/&lt;/g, "<")
626
+ .replace(/&gt;/g, ">")
627
+ .replace(/&quot;/g, '"')
628
+ .replace(/&apos;/g, "'");
629
+ if (text.length > 0) {
630
+ children.push({ type: "text", text });
631
+ }
632
+ }
633
+
634
+ return children;
635
+ }
@@ -0,0 +1,39 @@
1
+ export const HIGHLIGHT_COLOR_MAP = {
2
+ black: "000000",
3
+ blue: "0000FF",
4
+ cyan: "00FFFF",
5
+ darkBlue: "000080",
6
+ darkCyan: "008080",
7
+ darkGray: "808080",
8
+ darkGreen: "008000",
9
+ darkMagenta: "800080",
10
+ darkRed: "8B0000",
11
+ darkYellow: "808000",
12
+ green: "00FF00",
13
+ lightGray: "C0C0C0",
14
+ magenta: "FF00FF",
15
+ red: "FF0000",
16
+ white: "FFFFFF",
17
+ yellow: "FFFF00",
18
+ } as const;
19
+
20
+ export type HighlightColorName = keyof typeof HIGHLIGHT_COLOR_MAP;
21
+
22
+ export function resolveHighlightColor(
23
+ value: string | null | undefined,
24
+ ): { color: string; val: HighlightColorName } | undefined {
25
+ if (!value || value === "none") {
26
+ return undefined;
27
+ }
28
+
29
+ const normalizedValue = value as HighlightColorName;
30
+ const color = HIGHLIGHT_COLOR_MAP[normalizedValue];
31
+ if (!color) {
32
+ return undefined;
33
+ }
34
+
35
+ return {
36
+ color,
37
+ val: normalizedValue,
38
+ };
39
+ }
@@ -0,0 +1,44 @@
1
+ import type { NumberingCatalog } from "../../model/canonical-document.ts";
2
+
3
+ export const DOCX_NULL_NUMBERING_INSTANCE_ID = "num:0";
4
+ export const DOCX_NULL_ABSTRACT_NUMBERING_ID = "abstract-num:__docx-import-null__";
5
+
6
+ export function createSyntheticDocxNullNumberingCatalog(): Pick<
7
+ NumberingCatalog,
8
+ "abstractDefinitions" | "instances"
9
+ > {
10
+ return {
11
+ abstractDefinitions: {
12
+ [DOCX_NULL_ABSTRACT_NUMBERING_ID]: {
13
+ abstractNumberingId: DOCX_NULL_ABSTRACT_NUMBERING_ID,
14
+ levels: Array.from({ length: 9 }, (_unused, level) => ({
15
+ level,
16
+ format: "none",
17
+ text: "",
18
+ })),
19
+ },
20
+ },
21
+ instances: {
22
+ [DOCX_NULL_NUMBERING_INSTANCE_ID]: {
23
+ numberingInstanceId: DOCX_NULL_NUMBERING_INSTANCE_ID,
24
+ abstractNumberingId: DOCX_NULL_ABSTRACT_NUMBERING_ID,
25
+ overrides: [],
26
+ },
27
+ },
28
+ };
29
+ }
30
+
31
+ export function isSyntheticDocxNullAbstractDefinition(
32
+ definition: NumberingCatalog["abstractDefinitions"][string],
33
+ ): boolean {
34
+ return definition.abstractNumberingId === DOCX_NULL_ABSTRACT_NUMBERING_ID;
35
+ }
36
+
37
+ export function isSyntheticDocxNullNumberingInstance(
38
+ instance: NumberingCatalog["instances"][string],
39
+ ): boolean {
40
+ return (
41
+ instance.numberingInstanceId === DOCX_NULL_NUMBERING_INSTANCE_ID &&
42
+ instance.abstractNumberingId === DOCX_NULL_ABSTRACT_NUMBERING_ID
43
+ );
44
+ }