@beyondwork/docx-react-component 1.0.72 → 1.0.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/package.json +1 -1
  2. package/src/api/public-types.ts +37 -0
  3. package/src/api/v3/ai/policy.ts +31 -0
  4. package/src/api/v3/ui/chrome-preset-model.ts +6 -0
  5. package/src/api/v3/ui/viewport.ts +1 -1
  6. package/src/core/state/editor-state.ts +49 -6
  7. package/src/io/export/serialize-footnotes.ts +6 -0
  8. package/src/io/export/serialize-headers-footers.ts +6 -0
  9. package/src/io/export/serialize-main-document.ts +7 -0
  10. package/src/io/export/serialize-paragraph-formatting.ts +1 -1
  11. package/src/io/normalize/normalize-text.ts +38 -2
  12. package/src/io/ooxml/parse-headers-footers.ts +31 -0
  13. package/src/io/ooxml/parse-main-document.ts +127 -2
  14. package/src/io/ooxml/parse-paragraph-formatting.ts +1 -1
  15. package/src/runtime/layout/layout-engine-version.ts +22 -1
  16. package/src/runtime/layout/paginated-layout-engine.ts +47 -0
  17. package/src/runtime/scopes/action-validation.ts +30 -4
  18. package/src/runtime/scopes/replacement/apply.ts +1 -0
  19. package/src/runtime/scopes/scope-kinds/paragraph.ts +170 -7
  20. package/src/runtime/scopes/semantic-scope-types.ts +19 -0
  21. package/src/runtime/surface-projection.ts +55 -0
  22. package/src/session/import/loader-types.ts +18 -0
  23. package/src/session/import/loader.ts +2 -0
  24. package/src/ui-tailwind/editor-surface/pm-schema.ts +32 -0
  25. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +6 -0
  26. package/src/ui-tailwind/editor-surface/tw-page-block-view.helpers.ts +77 -0
  27. package/src/ui-tailwind/editor-surface/tw-page-block-view.tsx +12 -4
  28. package/src/ui-tailwind/page-stack/floating-image-overlay-model.ts +49 -32
  29. package/src/ui-tailwind/page-stack/tw-page-footer-band.tsx +5 -1
  30. package/src/ui-tailwind/page-stack/tw-page-header-band.tsx +5 -1
  31. package/src/ui-tailwind/page-stack/tw-region-block-renderer.tsx +71 -7
  32. package/src/ui-tailwind/theme/editor-theme.css +15 -16
  33. package/src/ui-tailwind/tw-review-workspace.tsx +21 -14
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@beyondwork/docx-react-component",
3
3
  "publisher": "beyondwork",
4
- "version": "1.0.72",
4
+ "version": "1.0.73",
5
5
  "description": "Embeddable React Word (docx) editor with review, comments, tracked changes, and round-trip OOXML fidelity.",
6
6
  "type": "module",
7
7
  "sideEffects": [
@@ -1445,6 +1445,34 @@ export type SurfaceBlockSnapshot =
1445
1445
  outlineLevel?: number;
1446
1446
  bidi?: boolean;
1447
1447
  suppressLineNumbers?: boolean;
1448
+ /**
1449
+ * `<w:framePr>` text-frame properties (ECMA-376 §17.3.1.11). Populated
1450
+ * by L03 surface projection from `CanonicalParagraphFormatting.frameProperties`
1451
+ * when present. L04 consumers use the positioning fields to decide
1452
+ * paginated-layout placement (out-of-flow framed paragraph vs. in-flow
1453
+ * block); L11 consumers use them to render `<div>` with absolute or
1454
+ * margin-anchored positioning + wrap behavior. Absent field = normal
1455
+ * in-flow paragraph.
1456
+ *
1457
+ * All sub-fields optional; absence carries the OOXML default.
1458
+ */
1459
+ frameProperties?: {
1460
+ widthTwips?: number;
1461
+ heightTwips?: number;
1462
+ hRule?: "auto" | "atLeast" | "exact";
1463
+ xTwips?: number;
1464
+ yTwips?: number;
1465
+ xAlign?: "left" | "center" | "right" | "inside" | "outside";
1466
+ yAlign?: "top" | "center" | "bottom" | "inside" | "outside" | "inline";
1467
+ hAnchor?: "text" | "margin" | "page";
1468
+ vAnchor?: "text" | "margin" | "page";
1469
+ wrap?: "around" | "auto" | "none" | "notBeside" | "tight" | "through";
1470
+ hSpaceTwips?: number;
1471
+ vSpaceTwips?: number;
1472
+ dropCap?: "none" | "drop" | "margin";
1473
+ lines?: number;
1474
+ anchorLock?: boolean;
1475
+ };
1448
1476
  segments: SurfaceInlineSegment[];
1449
1477
  }
1450
1478
  | {
@@ -5579,6 +5607,15 @@ export interface WordReviewEditorChromeVisibility {
5579
5607
  pageChrome: boolean;
5580
5608
  statusBar: boolean;
5581
5609
  reviewRail: boolean;
5610
+ /**
5611
+ * TwShellHeader (Edit / Review / Workflow / More mode tabs) at the top
5612
+ * of the workspace. Defaults to `true` on every preset EXCEPT
5613
+ * `selection`, which is intended for minimal embeds and should not paint
5614
+ * a workspace chrome header. coord-11 §21 — regressing this default has
5615
+ * history, particularly for visual-fidelity captures that expect a
5616
+ * truly chrome-less `chrome=none` embed.
5617
+ */
5618
+ shellHeader: boolean;
5582
5619
  }
5583
5620
 
5584
5621
  // ---------------------------------------------------------------------------
@@ -38,6 +38,30 @@ export interface GetPolicyInput {
38
38
 
39
39
  export type GetPolicyResult = AIActionPolicy | readonly AIActionPolicy[];
40
40
 
41
+ export type ListAIActionsResult = readonly AIAction[];
42
+
43
+ export const listAIActionsMetadata: ApiV3FnMetadata = {
44
+ name: "ai.listAIActions",
45
+ status: "live-with-adapter",
46
+ sourceLayer: "workflow-review",
47
+ liveEvidence: {
48
+ runnerTest: "test/api/v3/ai/ai-list-actions.test.ts",
49
+ commit: "refactor-09-post-closure-ki-p5",
50
+ },
51
+ uxIntent: { uiVisible: false, expectsUxResponse: "none" },
52
+ agentMetadata: {
53
+ readOrMutate: "read",
54
+ boundedScope: "document",
55
+ auditCategory: "policy-list",
56
+ contextPromptShape:
57
+ "Discovery: returns the AIAction vocabulary with policy entries. Use before calling getPolicy/evaluateAction so ids aren't guessed (closes KI-P5).",
58
+ },
59
+ stateClass: "A-canonical",
60
+ persistsTo: "canonical",
61
+ rwdReference:
62
+ "§AI API § ai.listAIActions. Read-only adapter over AI_ACTION_POLICIES — returns every AIAction id with a shipped policy entry. Closes KI-P5 (AIAction discoverability) by giving agents a runtime-discoverable vocabulary.",
63
+ };
64
+
41
65
  export const getPolicyMetadata: ApiV3FnMetadata = {
42
66
  name: "ai.getPolicy",
43
67
  status: "live-with-adapter",
@@ -62,6 +86,13 @@ export const getPolicyMetadata: ApiV3FnMetadata = {
62
86
 
63
87
  export function createPolicyFamily(_runtime: RuntimeApiHandle) {
64
88
  return {
89
+ listAIActions(): ListAIActionsResult {
90
+ // @endStateApi — live-with-adapter. Projects AI_ACTION_POLICIES[]
91
+ // to the action-id list; every entry is guaranteed policy-backed
92
+ // (getPolicy on these ids returns support != 'unsupported').
93
+ return Object.freeze(AI_ACTION_POLICIES.map((p) => p.action));
94
+ },
95
+
65
96
  getPolicy(input?: GetPolicyInput): GetPolicyResult {
66
97
  // @endStateApi — live-with-adapter. Delegates to Layer-06's
67
98
  // getAIActionPolicy(action) for single-action lookups or returns
@@ -116,6 +116,7 @@ export function resolveChromeVisibilityForPreset(input: {
116
116
  pageChrome: true,
117
117
  statusBar: true,
118
118
  reviewRail: false,
119
+ shellHeader: false,
119
120
  },
120
121
  simple: {
121
122
  toolbar: true,
@@ -126,6 +127,7 @@ export function resolveChromeVisibilityForPreset(input: {
126
127
  pageChrome: true,
127
128
  statusBar: true,
128
129
  reviewRail: false,
130
+ shellHeader: true,
129
131
  },
130
132
  advanced: {
131
133
  toolbar: true,
@@ -136,6 +138,7 @@ export function resolveChromeVisibilityForPreset(input: {
136
138
  pageChrome: true,
137
139
  statusBar: true,
138
140
  reviewRail: true,
141
+ shellHeader: true,
139
142
  },
140
143
  review: {
141
144
  toolbar: true,
@@ -146,6 +149,7 @@ export function resolveChromeVisibilityForPreset(input: {
146
149
  pageChrome: true,
147
150
  statusBar: true,
148
151
  reviewRail: options.showReviewRail,
152
+ shellHeader: true,
149
153
  },
150
154
  workflow: {
151
155
  toolbar: true,
@@ -156,6 +160,7 @@ export function resolveChromeVisibilityForPreset(input: {
156
160
  pageChrome: true,
157
161
  statusBar: true,
158
162
  reviewRail: options.showReviewRail,
163
+ shellHeader: true,
159
164
  },
160
165
  collab: {
161
166
  toolbar: true,
@@ -166,6 +171,7 @@ export function resolveChromeVisibilityForPreset(input: {
166
171
  pageChrome: true,
167
172
  statusBar: true,
168
173
  reviewRail: options.showReviewRail,
174
+ shellHeader: true,
169
175
  },
170
176
  };
171
177
 
@@ -112,7 +112,7 @@ export const scrollToPageMetadata: ApiV3FnMetadata = {
112
112
  stateClass: "C-local",
113
113
  persistsTo: "none",
114
114
  rwdReference:
115
- "§UI API § ui.viewport.scrollToPage. Resolves pageNumber → scrollY via handle.geometry.getPage(pageIndex); dispatches through controller.dispatchScroll({ kind:'page', value, behavior }); returns the settled {actualPage, scrollY}. 1-based page numbers; clamps to [1, pageCount]. First-class API for visual-fidelity harness + 'Go to page N' UX — replaces DOM-scrape fallback (coord-10 §γ). When L07 ships runtime.viewport.getPageAnchor / getPageGeometry (coord-07 §2.9), this wrapper may be simplified to delegate to those primitives; the public shape stays stable.",
115
+ "§UI API § ui.viewport.scrollToPage. Resolves pageNumber → scrollY via handle.geometry.getPage(pageIndex); dispatches through controller.dispatchScroll({ kind:'page', value, behavior }); returns the settled {actualPage, scrollY}. 1-based page numbers; clamps to [1, pageCount]. First-class API for visual-fidelity harness + 'Go to page N' UX — replaces DOM-scrape fallback (coord-10 §γ). Parity note: reads the same `handle.geometry.getPage(i).frame.topPx` source as `runtime.viewport.getPageAnchor` (L07 coord-07 §2.9, shipped 2026-04-24 in `src/api/v3/runtime/viewport.ts`), so `actualPage + scrollY` here and `{scrollY, pageRect}` on the runtime side stay consistent by construction. No direct delegation today because `scripts/ci-check-ui-api-layer-purity.mjs` restricts `src/api/v3/ui/**` from importing `src/api/v3/runtime/**`; both surfaces are thin wrappers over the shared geometry facet.",
116
116
  };
117
117
 
118
118
  // ----- X5 markup-mode metadata (state-classes cross-cutting Slice X5) -----
@@ -582,14 +582,57 @@ export function createPersistedEditorSnapshot(
582
582
  }
583
583
 
584
584
  function estimateParagraphCount(content: unknown): number {
585
- if (Array.isArray(content)) {
586
- return content.length;
587
- }
585
+ // Canonical shape: `{type:"doc", children: BlockNode[]}`. Older
586
+ // shapes (array / `.blocks`) handled for persistence-snapshot
587
+ // fallback. KI-P4 (2026-04-23): pre-fix the array + .blocks
588
+ // branches never matched the current envelope, so the fallback
589
+ // returned 1 on any non-empty document regardless of paragraph
590
+ // count. Fix counts ParagraphNode entries recursively, descending
591
+ // into table cells + SDT / customXml blocks so nested paragraphs
592
+ // contribute to the total.
593
+ let count = 0;
594
+ const walk = (node: unknown): void => {
595
+ if (!node || typeof node !== "object") return;
596
+ const typed = node as { type?: unknown };
597
+ if (typed.type === "paragraph") {
598
+ count += 1;
599
+ return;
600
+ }
601
+ if (typed.type === "table") {
602
+ const rows = (node as { rows?: unknown[] }).rows;
603
+ if (Array.isArray(rows)) {
604
+ for (const row of rows) {
605
+ const cells = (row as { cells?: unknown[] }).cells;
606
+ if (Array.isArray(cells)) {
607
+ for (const cell of cells) {
608
+ const children = (cell as { children?: unknown[] }).children;
609
+ if (Array.isArray(children)) children.forEach(walk);
610
+ }
611
+ }
612
+ }
613
+ }
614
+ return;
615
+ }
616
+ const children = (node as { children?: unknown[] }).children;
617
+ if (Array.isArray(children)) children.forEach(walk);
618
+ };
588
619
 
589
- if (content && typeof content === "object" && Array.isArray((content as { blocks?: unknown[] }).blocks)) {
590
- return ((content as { blocks: unknown[] }).blocks).length;
620
+ if (content && typeof content === "object") {
621
+ const children = (content as { children?: unknown[] }).children;
622
+ if (Array.isArray(children)) {
623
+ children.forEach(walk);
624
+ return count;
625
+ }
626
+ const blocks = (content as { blocks?: unknown[] }).blocks;
627
+ if (Array.isArray(blocks)) {
628
+ blocks.forEach(walk);
629
+ return count;
630
+ }
631
+ }
632
+ if (Array.isArray(content)) {
633
+ content.forEach(walk);
634
+ return count;
591
635
  }
592
-
593
636
  return extractText(content).length > 0 ? 1 : 0;
594
637
  }
595
638
 
@@ -18,6 +18,7 @@ import {
18
18
  } from "./table-properties-xml.ts";
19
19
  import { twip } from "./twip.ts";
20
20
  import { escapeXmlAttribute } from "./escape-xml-attribute.ts";
21
+ import { buildFrameXml } from "./serialize-paragraph-formatting.ts";
21
22
 
22
23
  export const WORD_FOOTNOTES_CONTENT_TYPE =
23
24
  "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml";
@@ -222,6 +223,11 @@ function buildParagraphPropertiesXml(paragraph: ParagraphNode): string {
222
223
  if (paragraph.styleId) {
223
224
  parts.push(`<w:pStyle w:val="${escapeXmlAttribute(paragraph.styleId)}"/>`);
224
225
  }
226
+ // Coord-04 §1.19.d — direct-paragraph framePr (footnotes path).
227
+ {
228
+ const frameXml = buildFrameXml(paragraph.frameProperties);
229
+ if (frameXml) parts.push(frameXml);
230
+ }
225
231
  if (paragraph.alignment) {
226
232
  parts.push(`<w:jc w:val="${escapeXmlAttribute(paragraph.alignment)}"/>`);
227
233
  }
@@ -18,6 +18,7 @@ import {
18
18
  } from "./table-properties-xml.ts";
19
19
  import { twip } from "./twip.ts";
20
20
  import { escapeXmlAttribute } from "./escape-xml-attribute.ts";
21
+ import { buildFrameXml } from "./serialize-paragraph-formatting.ts";
21
22
 
22
23
  export const WORD_HEADER_CONTENT_TYPE =
23
24
  "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
@@ -186,6 +187,11 @@ function buildParagraphPropertiesXml(paragraph: ParagraphNode): string {
186
187
  if (paragraph.styleId) {
187
188
  parts.push(`<w:pStyle w:val="${escapeXmlAttribute(paragraph.styleId)}"/>`);
188
189
  }
190
+ // Coord-04 §1.19.d — direct-paragraph framePr (headers/footers path).
191
+ {
192
+ const frameXml = buildFrameXml(paragraph.frameProperties);
193
+ if (frameXml) parts.push(frameXml);
194
+ }
189
195
  if (paragraph.spacing) {
190
196
  const s = paragraph.spacing;
191
197
  const attrs: string[] = [];
@@ -22,6 +22,7 @@ import { SCOPE_MARKER_BOOKMARK_PREFIX } from "../ooxml/parse-scope-markers.ts";
22
22
  import { getOpaqueFragment } from "../../preservation/store.ts";
23
23
  import { retainRelationshipsForFragment } from "../../preservation/relationship-retention.ts";
24
24
  import { serializeParagraphNumberingProperties } from "./serialize-numbering.ts";
25
+ import { buildFrameXml } from "./serialize-paragraph-formatting.ts";
25
26
  import {
26
27
  serializeTableCellPropertiesXml,
27
28
  serializeTablePropertiesXml,
@@ -716,6 +717,12 @@ function buildParagraphPropertiesXml(paragraph: ParagraphNode): string {
716
717
  pushOnOffParagraphProperty(children, "keepNext", paragraph.keepNext);
717
718
  pushOnOffParagraphProperty(children, "keepLines", paragraph.keepLines);
718
719
  pushOnOffParagraphProperty(children, "pageBreakBefore", paragraph.pageBreakBefore);
720
+ // ECMA-376 §17.3.1 canonical slot for framePr: between pageBreakBefore
721
+ // and pBdr. Coord-04 §1.19.d — direct-paragraph path.
722
+ {
723
+ const frameXml = buildFrameXml(paragraph.frameProperties);
724
+ if (frameXml) children.push(frameXml);
725
+ }
719
726
  pushOnOffParagraphProperty(children, "widowControl", paragraph.widowControl);
720
727
  if (paragraph.outlineLevel !== undefined) {
721
728
  children.push(`<w:outlineLvl w:val="${paragraph.outlineLevel}"/>`);
@@ -93,7 +93,7 @@ function buildSpacingXml(s: ParagraphSpacing | undefined): string {
93
93
  return attrs.length > 0 ? `<w:spacing ${attrs.join(" ")}/>` : "";
94
94
  }
95
95
 
96
- function buildFrameXml(f: FrameProperties | undefined): string {
96
+ export function buildFrameXml(f: FrameProperties | undefined): string {
97
97
  if (!f) return "";
98
98
  // Prefer parsed rawXml when available — preserves extension attributes
99
99
  // (`w14:*`, `w15:*`, `mc:Ignorable`) that the typed field set doesn't
@@ -264,6 +264,7 @@ function normalizeParagraph(
264
264
  ...(paragraph.suppressLineNumbers !== undefined
265
265
  ? { suppressLineNumbers: paragraph.suppressLineNumbers }
266
266
  : {}),
267
+ ...(paragraph.frameProperties ? { frameProperties: paragraph.frameProperties } : {}),
267
268
  // A.7: preserve w14:paraId / w14:textId across import → export so
268
269
  // downstream tools that diff documents by paragraph id stay stable.
269
270
  ...(paragraph.wordExtensionIds
@@ -715,9 +716,30 @@ function registerComplexPreviewMedia(
715
716
  function normalizeHyperlink(node: ParsedHyperlinkNode): {
716
717
  type: "hyperlink";
717
718
  href: string;
718
- children: Array<TextNode | { type: "hard_break" } | { type: "tab" }>;
719
+ children: Array<
720
+ | TextNode
721
+ | { type: "hard_break" }
722
+ | { type: "column_break" }
723
+ | { type: "page_break" }
724
+ | { type: "tab" }
725
+ | { type: "symbol"; char: string; font?: string; marks?: TextMark[] }
726
+ >;
719
727
  } {
720
- const children: Array<TextNode | { type: "hard_break" } | { type: "tab" }> = [];
728
+ // Canonical `HyperlinkNode.children` accepts the full inline-leaf set
729
+ // (TextNode | HardBreakNode | ColumnBreakNode | PageBreakNode | TabNode |
730
+ // SymbolNode). Matching the canonical shape here keeps rare
731
+ // hyperlink-inside-break patterns (a link spanning a column or page
732
+ // break in Word's output) from silently dropping at the normalize step —
733
+ // same class of drop that `coord-04 §1.19.b` fixed one level up in
734
+ // `normalizeInlineChildren`.
735
+ const children: Array<
736
+ | TextNode
737
+ | { type: "hard_break" }
738
+ | { type: "column_break" }
739
+ | { type: "page_break" }
740
+ | { type: "tab" }
741
+ | { type: "symbol"; char: string; font?: string; marks?: TextMark[] }
742
+ > = [];
721
743
 
722
744
  for (const child of node.children) {
723
745
  switch (child.type) {
@@ -743,6 +765,20 @@ function normalizeHyperlink(node: ParsedHyperlinkNode): {
743
765
  case "hard_break":
744
766
  children.push({ type: "hard_break" });
745
767
  break;
768
+ case "column_break":
769
+ children.push({ type: "column_break" });
770
+ break;
771
+ case "page_break":
772
+ children.push({ type: "page_break" });
773
+ break;
774
+ case "symbol":
775
+ children.push({
776
+ type: "symbol",
777
+ char: child.char,
778
+ ...(child.font ? { font: child.font } : {}),
779
+ ...(child.marks && child.marks.length > 0 ? { marks: child.marks } : {}),
780
+ });
781
+ break;
746
782
  }
747
783
  }
748
784
 
@@ -328,6 +328,37 @@ function parseParagraphElement(
328
328
  activeComplexField = null;
329
329
  }
330
330
  pushFieldNode(children, child, "simple");
331
+ } else if (name === "sdt") {
332
+ // coord-11 §22 — structured-document-tag wrapping run-level content
333
+ // inside a header/footer paragraph. Word commonly uses these to
334
+ // bundle the page-number field + decorative drawings (e.g. CCEP's
335
+ // footer "Copyright CCEP STRICTLY CONFIDENTIAL" red rectangle +
336
+ // "Page N" label both sit inside one `<w:sdt>` in footer1.xml).
337
+ // Without this case the sdt was silently dropped at the paragraph
338
+ // walker and every run it carried — including WPS shapes bearing
339
+ // the brand-strip text — never reached the canonical tree.
340
+ // Treat `<w:sdtContent>` as a transparent wrapper and re-process
341
+ // its `<w:r>` / `<w:hyperlink>` / `<w:sdt>` children as if they
342
+ // were direct paragraph children.
343
+ const sdtContent = findChildElementOptional(child, "sdtContent");
344
+ if (sdtContent) {
345
+ for (const grandchild of sdtContent.children) {
346
+ if (grandchild.type !== "element") continue;
347
+ const gname = localName(grandchild.name);
348
+ if (gname === "r") {
349
+ activeComplexField = appendRunNodes(grandchild, children, activeComplexField, sourceXml, opts);
350
+ } else if (gname === "hyperlink") {
351
+ children.push(parseHyperlinkElement(grandchild, opts));
352
+ } else if (gname === "bookmarkStart" || gname === "bookmarkEnd") {
353
+ children.push(parseBookmarkElement(grandchild));
354
+ } else if (gname === "fldSimple") {
355
+ pushFieldNode(children, grandchild, "simple");
356
+ }
357
+ // Nested sdt / other elements ignored — deeper nesting is rare
358
+ // enough that opaque round-trip via the block-level sdt parser
359
+ // handles it if it matters.
360
+ }
361
+ }
331
362
  }
332
363
  }
333
364
 
@@ -1,6 +1,7 @@
1
1
  import type {
2
2
  BorderSpec,
3
3
  CellShading,
4
+ FrameProperties,
4
5
  TextMark,
5
6
  ParagraphBorders,
6
7
  ParagraphShading,
@@ -39,6 +40,7 @@ import { parseComplexContentXml, type ChartPartLookup } from "./parse-complex-co
39
40
  import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
40
41
  import { parseObject } from "./parse-object.ts";
41
42
  import { parseDrawingFrame } from "./parse-drawing.ts";
43
+ import { readFrameProperties } from "./parse-paragraph-formatting.ts";
42
44
  import { classifyFieldInstruction } from "./parse-fields.ts";
43
45
  import { parseFFDataFromFldChar } from "./parse-ffdata.ts";
44
46
  import { resolveHighlightColor } from "./highlight-colors.ts";
@@ -217,6 +219,41 @@ function captureGrabBagFromContainer(
217
219
  export interface ParsedMainDocument {
218
220
  blocks: ParsedBlockNode[];
219
221
  finalSectionProperties?: SectionProperties;
222
+ /**
223
+ * Aggregate count of cosmetic markers stripped during parse (see
224
+ * {@link ParseMainDocumentOptions.stripCosmeticMarkers}). Keyed by
225
+ * local element name (e.g. `lastRenderedPageBreak`). Absent when no
226
+ * markers were stripped.
227
+ */
228
+ skippedCosmeticMarkerCounts?: Readonly<Record<string, number>>;
229
+ }
230
+
231
+ /**
232
+ * Cosmetic markers that Word re-inserts on reopen and that carry no
233
+ * contract semantics. Stripping them at parse time unblocks
234
+ * `replaceText` on ranges that today cross them as `opaque_inline`
235
+ * boundaries. See `docs/architecture/cosmetic-marker-strip.md`.
236
+ *
237
+ * This is the Phase 1 set. Bookmark-pair stripping (with reference
238
+ * scan) is Phase 2.
239
+ */
240
+ export const COSMETIC_MARKER_ELEMENT_NAMES: ReadonlySet<string> = new Set([
241
+ "lastRenderedPageBreak",
242
+ "proofErr",
243
+ "noBreakHyphen",
244
+ ]);
245
+
246
+ export interface ParseMainDocumentOptions {
247
+ /**
248
+ * When `true` (the default), drops `<w:lastRenderedPageBreak/>`,
249
+ * `<w:proofErr/>`, and `<w:noBreakHyphen/>` during the parse walk
250
+ * instead of emitting them as `opaque_inline` nodes. Counts are
251
+ * reported on {@link ParsedMainDocument.skippedCosmeticMarkerCounts}.
252
+ *
253
+ * Set to `false` to preserve the pre-strip behavior exactly — every
254
+ * cosmetic marker becomes an `opaque_inline` with its source XML.
255
+ */
256
+ stripCosmeticMarkers?: boolean;
220
257
  }
221
258
 
222
259
  export type ParsedBlockNode =
@@ -256,6 +293,15 @@ export interface ParsedParagraphNode {
256
293
  bidi?: boolean;
257
294
  suppressLineNumbers?: boolean;
258
295
  cnfStyle?: string;
296
+ /**
297
+ * `<w:framePr>` declared directly on the paragraph's own `<w:pPr>`.
298
+ * Coord-04 §1.19.d step 2 (inline path). The style-cascade path
299
+ * flows through `CanonicalParagraphFormatting.frameProperties` on
300
+ * the style side; this slot captures the direct-override path so
301
+ * L02 `ParagraphNode.frameProperties` (added 2026-04-24 `4b3ea0b2`)
302
+ * can reach its canonical shape.
303
+ */
304
+ frameProperties?: FrameProperties;
259
305
  /** A.7: preserved w14 extension ids (paraId/textId). */
260
306
  wordExtensionIds?: {
261
307
  paraId?: string;
@@ -656,24 +702,61 @@ export function setActiveParseTelemetryBus(bus: ParseTelemetryBus | undefined):
656
702
  activeParseTelemetryBus = bus;
657
703
  }
658
704
 
705
+ /**
706
+ * Request-scoped cosmetic-marker strip context. Set by
707
+ * `parseMainDocumentXml` for the duration of a single parse; read at
708
+ * the four emission sites in `parseBodyChild` / `parseRun` /
709
+ * `parseRunContentOnly` / `parseRevisionContainer`. Using a module
710
+ * variable instead of threading the flag through ~15 intermediate
711
+ * function signatures keeps the call sites readable; the try/finally
712
+ * in the entry point ensures the variable never leaks across calls.
713
+ *
714
+ * Re-entrancy invariant matches `activeChartPartLookup` above.
715
+ */
716
+ interface CosmeticStripContext {
717
+ readonly strip: boolean;
718
+ readonly counts: Record<string, number>;
719
+ }
720
+ let activeCosmeticStripContext: CosmeticStripContext | null = null;
721
+
722
+ function noteStrippedCosmeticMarker(tag: string): void {
723
+ if (!activeCosmeticStripContext) return;
724
+ activeCosmeticStripContext.counts[tag] =
725
+ (activeCosmeticStripContext.counts[tag] ?? 0) + 1;
726
+ }
727
+
728
+ function shouldStripCosmeticMarker(): boolean {
729
+ return activeCosmeticStripContext?.strip === true;
730
+ }
731
+
659
732
  export function parseMainDocumentXml(
660
733
  xml: string,
661
734
  relationships: readonly OpcRelationship[] = [],
662
735
  mediaParts: ReadonlyMap<string, InlineMediaPart> = new Map(),
663
736
  sourcePartPath = "/word/document.xml",
664
737
  chartPartLookup?: ChartPartLookup,
738
+ parseOptions: ParseMainDocumentOptions = {},
665
739
  ): ParsedMainDocument {
666
740
  activeChartPartLookup = chartPartLookup;
741
+ const stripContext: CosmeticStripContext = {
742
+ strip: parseOptions.stripCosmeticMarkers !== false,
743
+ counts: Object.create(null) as Record<string, number>,
744
+ };
745
+ activeCosmeticStripContext = stripContext;
667
746
  const bus = activeParseTelemetryBus;
668
747
  const started = bus?.isEnabled("parse") ? performanceNow() : 0;
669
748
  try {
670
749
  const result = parseMainDocumentXmlInner(xml, relationships, mediaParts, sourcePartPath);
750
+ if (Object.keys(stripContext.counts).length > 0) {
751
+ result.skippedCosmeticMarkerCounts = Object.freeze({ ...stripContext.counts });
752
+ }
671
753
  if (bus?.isEnabled("parse")) {
672
754
  emitParseSummary(bus, result, sourcePartPath, performanceNow() - started);
673
755
  }
674
756
  return result;
675
757
  } finally {
676
758
  activeChartPartLookup = undefined;
759
+ activeCosmeticStripContext = null;
677
760
  }
678
761
  }
679
762
 
@@ -704,6 +787,13 @@ function emitParseSummary(
704
787
  blockCount: result.blocks.length,
705
788
  blockKindCounts: counts,
706
789
  ms,
790
+ // Strip counts are surfaced here (telemetry-only) rather than as a
791
+ // warning on `diagnostics.warnings` — the markers carry no
792
+ // contract semantics and surfacing them in the user-visible
793
+ // warnings feed would be noise. Available to debug UX / tests via
794
+ // the `parse` channel; absent when the feature is disabled or no
795
+ // markers were stripped.
796
+ skippedCosmeticMarkerCounts: result.skippedCosmeticMarkerCounts,
707
797
  },
708
798
  });
709
799
  }
@@ -1004,6 +1094,7 @@ function parseBodyChild(
1004
1094
  let bidi: ParsedParagraphNode["bidi"];
1005
1095
  let suppressLineNumbers: ParsedParagraphNode["suppressLineNumbers"];
1006
1096
  let cnfStyle: ParsedParagraphNode["cnfStyle"];
1097
+ let frameProperties: ParsedParagraphNode["frameProperties"];
1007
1098
  let sectionProperties: SectionProperties | undefined;
1008
1099
  let sectionPropertiesXml: string | undefined;
1009
1100
  let paragraphSupported = true;
@@ -1050,6 +1141,12 @@ function parseBodyChild(
1050
1141
  bidi = readOnOffParagraphProperty(child, "bidi");
1051
1142
  suppressLineNumbers = readOnOffParagraphProperty(child, "suppressLineNumbers");
1052
1143
  cnfStyle = readParagraphCnfStyle(child);
1144
+ {
1145
+ const framePrNode = child.children.find(
1146
+ (c): c is XmlElementNode => c.type === "element" && localName(c.name) === "framePr",
1147
+ );
1148
+ if (framePrNode) frameProperties = readFrameProperties(framePrNode);
1149
+ }
1053
1150
  sectionProperties = readSectionPropertiesFromPPr(child);
1054
1151
  sectionPropertiesXml = readSectionPropertiesXmlFromPPr(child, sourceXml);
1055
1152
  paragraphSupported = paragraphSupported && supportsParagraphProperties(child);
@@ -1148,6 +1245,10 @@ function parseBodyChild(
1148
1245
  flushActiveComplexField(children, () => {
1149
1246
  activeComplexField = null;
1150
1247
  }, activeComplexField);
1248
+ if (shouldStripCosmeticMarker()) {
1249
+ noteStrippedCosmeticMarker("proofErr");
1250
+ break;
1251
+ }
1151
1252
  children.push({
1152
1253
  type: "opaque_inline",
1153
1254
  rawXml: sourceXml.slice(child.start, child.end),
@@ -1235,6 +1336,7 @@ function parseBodyChild(
1235
1336
  ...(bidi !== undefined ? { bidi } : {}),
1236
1337
  ...(suppressLineNumbers !== undefined ? { suppressLineNumbers } : {}),
1237
1338
  ...(cnfStyle ? { cnfStyle } : {}),
1339
+ ...(frameProperties ? { frameProperties } : {}),
1238
1340
  ...(wordExtensionIds ? { wordExtensionIds } : {}),
1239
1341
  ...(sectionProperties ? { sectionProperties } : {}),
1240
1342
  ...(sectionPropertiesXml ? { sectionPropertiesXml } : {}),
@@ -2584,6 +2686,11 @@ function parseRun(
2584
2686
  }
2585
2687
  case "lastRenderedPageBreak":
2586
2688
  case "proofErr":
2689
+ case "noBreakHyphen":
2690
+ if (shouldStripCosmeticMarker()) {
2691
+ noteStrippedCosmeticMarker(localName(child.name));
2692
+ break;
2693
+ }
2587
2694
  result.push({
2588
2695
  type: "opaque_inline",
2589
2696
  rawXml: sourceXml.slice(child.start, child.end),
@@ -2657,12 +2764,23 @@ function parseRevisionContainer(
2657
2764
  result.push(hyperlink);
2658
2765
  break;
2659
2766
  }
2767
+ case "proofErr":
2768
+ case "lastRenderedPageBreak":
2769
+ case "noBreakHyphen":
2770
+ if (shouldStripCosmeticMarker()) {
2771
+ noteStrippedCosmeticMarker(localName(child.name));
2772
+ break;
2773
+ }
2774
+ return [
2775
+ {
2776
+ type: "opaque_inline",
2777
+ rawXml: sourceXml.slice(node.start, node.end),
2778
+ },
2779
+ ];
2660
2780
  case "commentRangeStart":
2661
2781
  case "commentRangeEnd":
2662
2782
  case "bookmarkStart":
2663
2783
  case "bookmarkEnd":
2664
- case "proofErr":
2665
- case "lastRenderedPageBreak":
2666
2784
  return [
2667
2785
  {
2668
2786
  type: "opaque_inline",
@@ -2835,10 +2953,17 @@ function parseRunContentOnly(
2835
2953
  case "commentReference":
2836
2954
  case "lastRenderedPageBreak":
2837
2955
  case "proofErr":
2956
+ case "noBreakHyphen": {
2957
+ const tag = localName(child.name);
2958
+ if (shouldStripCosmeticMarker() && tag !== "commentReference") {
2959
+ noteStrippedCosmeticMarker(tag);
2960
+ break;
2961
+ }
2838
2962
  if (options.preserveUnsupportedReviewMarkup) {
2839
2963
  return { nodes: [], supported: false };
2840
2964
  }
2841
2965
  break;
2966
+ }
2842
2967
  default:
2843
2968
  return { nodes: [], supported: false };
2844
2969
  }
@@ -204,7 +204,7 @@ function readShading(node: XmlElementNode): ParagraphShading | undefined {
204
204
  * The typed attributes cover the CCEP cases we've seen (2-column inset
205
205
  * text frames, drop-caps); extension attrs are rare in that corpus.
206
206
  */
207
- function readFrameProperties(node: XmlElementNode): FrameProperties | undefined {
207
+ export function readFrameProperties(node: XmlElementNode): FrameProperties | undefined {
208
208
  const out: FrameProperties = {};
209
209
  const width = readIntAttr(node, "w:w");
210
210
  if (width !== undefined) out.widthTwips = width;