@beyondwork/docx-react-component 1.0.72 → 1.0.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api/public-types.ts +37 -0
- package/src/api/v3/ai/policy.ts +31 -0
- package/src/api/v3/ui/chrome-preset-model.ts +6 -0
- package/src/api/v3/ui/viewport.ts +1 -1
- package/src/core/state/editor-state.ts +49 -6
- package/src/io/export/serialize-footnotes.ts +6 -0
- package/src/io/export/serialize-headers-footers.ts +6 -0
- package/src/io/export/serialize-main-document.ts +7 -0
- package/src/io/export/serialize-paragraph-formatting.ts +1 -1
- package/src/io/normalize/normalize-text.ts +38 -2
- package/src/io/ooxml/parse-headers-footers.ts +31 -0
- package/src/io/ooxml/parse-main-document.ts +127 -2
- package/src/io/ooxml/parse-paragraph-formatting.ts +1 -1
- package/src/runtime/layout/layout-engine-version.ts +22 -1
- package/src/runtime/layout/paginated-layout-engine.ts +47 -0
- package/src/runtime/scopes/action-validation.ts +30 -4
- package/src/runtime/scopes/replacement/apply.ts +1 -0
- package/src/runtime/scopes/scope-kinds/paragraph.ts +170 -7
- package/src/runtime/scopes/semantic-scope-types.ts +19 -0
- package/src/runtime/surface-projection.ts +55 -0
- package/src/session/import/loader-types.ts +18 -0
- package/src/session/import/loader.ts +2 -0
- package/src/ui-tailwind/editor-surface/pm-schema.ts +32 -0
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +6 -0
- package/src/ui-tailwind/editor-surface/tw-page-block-view.helpers.ts +77 -0
- package/src/ui-tailwind/editor-surface/tw-page-block-view.tsx +12 -4
- package/src/ui-tailwind/page-stack/floating-image-overlay-model.ts +49 -32
- package/src/ui-tailwind/page-stack/tw-page-footer-band.tsx +5 -1
- package/src/ui-tailwind/page-stack/tw-page-header-band.tsx +5 -1
- package/src/ui-tailwind/page-stack/tw-region-block-renderer.tsx +71 -7
- package/src/ui-tailwind/theme/editor-theme.css +15 -16
- package/src/ui-tailwind/tw-review-workspace.tsx +21 -14
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@beyondwork/docx-react-component",
|
|
3
3
|
"publisher": "beyondwork",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.73",
|
|
5
5
|
"description": "Embeddable React Word (docx) editor with review, comments, tracked changes, and round-trip OOXML fidelity.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"sideEffects": [
|
package/src/api/public-types.ts
CHANGED
|
@@ -1445,6 +1445,34 @@ export type SurfaceBlockSnapshot =
|
|
|
1445
1445
|
outlineLevel?: number;
|
|
1446
1446
|
bidi?: boolean;
|
|
1447
1447
|
suppressLineNumbers?: boolean;
|
|
1448
|
+
/**
|
|
1449
|
+
* `<w:framePr>` text-frame properties (ECMA-376 §17.3.1.11). Populated
|
|
1450
|
+
* by L03 surface projection from `CanonicalParagraphFormatting.frameProperties`
|
|
1451
|
+
* when present. L04 consumers use the positioning fields to decide
|
|
1452
|
+
* paginated-layout placement (out-of-flow framed paragraph vs. in-flow
|
|
1453
|
+
* block); L11 consumers use them to render `<div>` with absolute or
|
|
1454
|
+
* margin-anchored positioning + wrap behavior. Absent field = normal
|
|
1455
|
+
* in-flow paragraph.
|
|
1456
|
+
*
|
|
1457
|
+
* All sub-fields optional; absence carries the OOXML default.
|
|
1458
|
+
*/
|
|
1459
|
+
frameProperties?: {
|
|
1460
|
+
widthTwips?: number;
|
|
1461
|
+
heightTwips?: number;
|
|
1462
|
+
hRule?: "auto" | "atLeast" | "exact";
|
|
1463
|
+
xTwips?: number;
|
|
1464
|
+
yTwips?: number;
|
|
1465
|
+
xAlign?: "left" | "center" | "right" | "inside" | "outside";
|
|
1466
|
+
yAlign?: "top" | "center" | "bottom" | "inside" | "outside" | "inline";
|
|
1467
|
+
hAnchor?: "text" | "margin" | "page";
|
|
1468
|
+
vAnchor?: "text" | "margin" | "page";
|
|
1469
|
+
wrap?: "around" | "auto" | "none" | "notBeside" | "tight" | "through";
|
|
1470
|
+
hSpaceTwips?: number;
|
|
1471
|
+
vSpaceTwips?: number;
|
|
1472
|
+
dropCap?: "none" | "drop" | "margin";
|
|
1473
|
+
lines?: number;
|
|
1474
|
+
anchorLock?: boolean;
|
|
1475
|
+
};
|
|
1448
1476
|
segments: SurfaceInlineSegment[];
|
|
1449
1477
|
}
|
|
1450
1478
|
| {
|
|
@@ -5579,6 +5607,15 @@ export interface WordReviewEditorChromeVisibility {
|
|
|
5579
5607
|
pageChrome: boolean;
|
|
5580
5608
|
statusBar: boolean;
|
|
5581
5609
|
reviewRail: boolean;
|
|
5610
|
+
/**
|
|
5611
|
+
* TwShellHeader (Edit / Review / Workflow / More mode tabs) at the top
|
|
5612
|
+
* of the workspace. Defaults to `true` on every preset EXCEPT
|
|
5613
|
+
* `selection`, which is intended for minimal embeds and should not paint
|
|
5614
|
+
* a workspace chrome header. coord-11 §21 — regressing this default has
|
|
5615
|
+
* history, particularly for visual-fidelity captures that expect a
|
|
5616
|
+
* truly chrome-less `chrome=none` embed.
|
|
5617
|
+
*/
|
|
5618
|
+
shellHeader: boolean;
|
|
5582
5619
|
}
|
|
5583
5620
|
|
|
5584
5621
|
// ---------------------------------------------------------------------------
|
package/src/api/v3/ai/policy.ts
CHANGED
|
@@ -38,6 +38,30 @@ export interface GetPolicyInput {
|
|
|
38
38
|
|
|
39
39
|
export type GetPolicyResult = AIActionPolicy | readonly AIActionPolicy[];
|
|
40
40
|
|
|
41
|
+
export type ListAIActionsResult = readonly AIAction[];
|
|
42
|
+
|
|
43
|
+
export const listAIActionsMetadata: ApiV3FnMetadata = {
|
|
44
|
+
name: "ai.listAIActions",
|
|
45
|
+
status: "live-with-adapter",
|
|
46
|
+
sourceLayer: "workflow-review",
|
|
47
|
+
liveEvidence: {
|
|
48
|
+
runnerTest: "test/api/v3/ai/ai-list-actions.test.ts",
|
|
49
|
+
commit: "refactor-09-post-closure-ki-p5",
|
|
50
|
+
},
|
|
51
|
+
uxIntent: { uiVisible: false, expectsUxResponse: "none" },
|
|
52
|
+
agentMetadata: {
|
|
53
|
+
readOrMutate: "read",
|
|
54
|
+
boundedScope: "document",
|
|
55
|
+
auditCategory: "policy-list",
|
|
56
|
+
contextPromptShape:
|
|
57
|
+
"Discovery: returns the AIAction vocabulary with policy entries. Use before calling getPolicy/evaluateAction so ids aren't guessed (closes KI-P5).",
|
|
58
|
+
},
|
|
59
|
+
stateClass: "A-canonical",
|
|
60
|
+
persistsTo: "canonical",
|
|
61
|
+
rwdReference:
|
|
62
|
+
"§AI API § ai.listAIActions. Read-only adapter over AI_ACTION_POLICIES — returns every AIAction id with a shipped policy entry. Closes KI-P5 (AIAction discoverability) by giving agents a runtime-discoverable vocabulary.",
|
|
63
|
+
};
|
|
64
|
+
|
|
41
65
|
export const getPolicyMetadata: ApiV3FnMetadata = {
|
|
42
66
|
name: "ai.getPolicy",
|
|
43
67
|
status: "live-with-adapter",
|
|
@@ -62,6 +86,13 @@ export const getPolicyMetadata: ApiV3FnMetadata = {
|
|
|
62
86
|
|
|
63
87
|
export function createPolicyFamily(_runtime: RuntimeApiHandle) {
|
|
64
88
|
return {
|
|
89
|
+
listAIActions(): ListAIActionsResult {
|
|
90
|
+
// @endStateApi — live-with-adapter. Projects AI_ACTION_POLICIES[]
|
|
91
|
+
// to the action-id list; every entry is guaranteed policy-backed
|
|
92
|
+
// (getPolicy on these ids returns support != 'unsupported').
|
|
93
|
+
return Object.freeze(AI_ACTION_POLICIES.map((p) => p.action));
|
|
94
|
+
},
|
|
95
|
+
|
|
65
96
|
getPolicy(input?: GetPolicyInput): GetPolicyResult {
|
|
66
97
|
// @endStateApi — live-with-adapter. Delegates to Layer-06's
|
|
67
98
|
// getAIActionPolicy(action) for single-action lookups or returns
|
|
@@ -116,6 +116,7 @@ export function resolveChromeVisibilityForPreset(input: {
|
|
|
116
116
|
pageChrome: true,
|
|
117
117
|
statusBar: true,
|
|
118
118
|
reviewRail: false,
|
|
119
|
+
shellHeader: false,
|
|
119
120
|
},
|
|
120
121
|
simple: {
|
|
121
122
|
toolbar: true,
|
|
@@ -126,6 +127,7 @@ export function resolveChromeVisibilityForPreset(input: {
|
|
|
126
127
|
pageChrome: true,
|
|
127
128
|
statusBar: true,
|
|
128
129
|
reviewRail: false,
|
|
130
|
+
shellHeader: true,
|
|
129
131
|
},
|
|
130
132
|
advanced: {
|
|
131
133
|
toolbar: true,
|
|
@@ -136,6 +138,7 @@ export function resolveChromeVisibilityForPreset(input: {
|
|
|
136
138
|
pageChrome: true,
|
|
137
139
|
statusBar: true,
|
|
138
140
|
reviewRail: true,
|
|
141
|
+
shellHeader: true,
|
|
139
142
|
},
|
|
140
143
|
review: {
|
|
141
144
|
toolbar: true,
|
|
@@ -146,6 +149,7 @@ export function resolveChromeVisibilityForPreset(input: {
|
|
|
146
149
|
pageChrome: true,
|
|
147
150
|
statusBar: true,
|
|
148
151
|
reviewRail: options.showReviewRail,
|
|
152
|
+
shellHeader: true,
|
|
149
153
|
},
|
|
150
154
|
workflow: {
|
|
151
155
|
toolbar: true,
|
|
@@ -156,6 +160,7 @@ export function resolveChromeVisibilityForPreset(input: {
|
|
|
156
160
|
pageChrome: true,
|
|
157
161
|
statusBar: true,
|
|
158
162
|
reviewRail: options.showReviewRail,
|
|
163
|
+
shellHeader: true,
|
|
159
164
|
},
|
|
160
165
|
collab: {
|
|
161
166
|
toolbar: true,
|
|
@@ -166,6 +171,7 @@ export function resolveChromeVisibilityForPreset(input: {
|
|
|
166
171
|
pageChrome: true,
|
|
167
172
|
statusBar: true,
|
|
168
173
|
reviewRail: options.showReviewRail,
|
|
174
|
+
shellHeader: true,
|
|
169
175
|
},
|
|
170
176
|
};
|
|
171
177
|
|
|
@@ -112,7 +112,7 @@ export const scrollToPageMetadata: ApiV3FnMetadata = {
|
|
|
112
112
|
stateClass: "C-local",
|
|
113
113
|
persistsTo: "none",
|
|
114
114
|
rwdReference:
|
|
115
|
-
"§UI API § ui.viewport.scrollToPage. Resolves pageNumber → scrollY via handle.geometry.getPage(pageIndex); dispatches through controller.dispatchScroll({ kind:'page', value, behavior }); returns the settled {actualPage, scrollY}. 1-based page numbers; clamps to [1, pageCount]. First-class API for visual-fidelity harness + 'Go to page N' UX — replaces DOM-scrape fallback (coord-10 §γ).
|
|
115
|
+
"§UI API § ui.viewport.scrollToPage. Resolves pageNumber → scrollY via handle.geometry.getPage(pageIndex); dispatches through controller.dispatchScroll({ kind:'page', value, behavior }); returns the settled {actualPage, scrollY}. 1-based page numbers; clamps to [1, pageCount]. First-class API for visual-fidelity harness + 'Go to page N' UX — replaces DOM-scrape fallback (coord-10 §γ). Parity note: reads the same `handle.geometry.getPage(i).frame.topPx` source as `runtime.viewport.getPageAnchor` (L07 coord-07 §2.9, shipped 2026-04-24 in `src/api/v3/runtime/viewport.ts`), so `actualPage + scrollY` here and `{scrollY, pageRect}` on the runtime side stay consistent by construction. No direct delegation today because `scripts/ci-check-ui-api-layer-purity.mjs` restricts `src/api/v3/ui/**` from importing `src/api/v3/runtime/**`; both surfaces are thin wrappers over the shared geometry facet.",
|
|
116
116
|
};
|
|
117
117
|
|
|
118
118
|
// ----- X5 markup-mode metadata (state-classes cross-cutting Slice X5) -----
|
|
@@ -582,14 +582,57 @@ export function createPersistedEditorSnapshot(
|
|
|
582
582
|
}
|
|
583
583
|
|
|
584
584
|
function estimateParagraphCount(content: unknown): number {
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
585
|
+
// Canonical shape: `{type:"doc", children: BlockNode[]}`. Older
|
|
586
|
+
// shapes (array / `.blocks`) handled for persistence-snapshot
|
|
587
|
+
// fallback. KI-P4 (2026-04-23): pre-fix the array + .blocks
|
|
588
|
+
// branches never matched the current envelope, so the fallback
|
|
589
|
+
// returned 1 on any non-empty document regardless of paragraph
|
|
590
|
+
// count. Fix counts ParagraphNode entries recursively, descending
|
|
591
|
+
// into table cells + SDT / customXml blocks so nested paragraphs
|
|
592
|
+
// contribute to the total.
|
|
593
|
+
let count = 0;
|
|
594
|
+
const walk = (node: unknown): void => {
|
|
595
|
+
if (!node || typeof node !== "object") return;
|
|
596
|
+
const typed = node as { type?: unknown };
|
|
597
|
+
if (typed.type === "paragraph") {
|
|
598
|
+
count += 1;
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
if (typed.type === "table") {
|
|
602
|
+
const rows = (node as { rows?: unknown[] }).rows;
|
|
603
|
+
if (Array.isArray(rows)) {
|
|
604
|
+
for (const row of rows) {
|
|
605
|
+
const cells = (row as { cells?: unknown[] }).cells;
|
|
606
|
+
if (Array.isArray(cells)) {
|
|
607
|
+
for (const cell of cells) {
|
|
608
|
+
const children = (cell as { children?: unknown[] }).children;
|
|
609
|
+
if (Array.isArray(children)) children.forEach(walk);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
return;
|
|
615
|
+
}
|
|
616
|
+
const children = (node as { children?: unknown[] }).children;
|
|
617
|
+
if (Array.isArray(children)) children.forEach(walk);
|
|
618
|
+
};
|
|
588
619
|
|
|
589
|
-
if (content && typeof content === "object"
|
|
590
|
-
|
|
620
|
+
if (content && typeof content === "object") {
|
|
621
|
+
const children = (content as { children?: unknown[] }).children;
|
|
622
|
+
if (Array.isArray(children)) {
|
|
623
|
+
children.forEach(walk);
|
|
624
|
+
return count;
|
|
625
|
+
}
|
|
626
|
+
const blocks = (content as { blocks?: unknown[] }).blocks;
|
|
627
|
+
if (Array.isArray(blocks)) {
|
|
628
|
+
blocks.forEach(walk);
|
|
629
|
+
return count;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
if (Array.isArray(content)) {
|
|
633
|
+
content.forEach(walk);
|
|
634
|
+
return count;
|
|
591
635
|
}
|
|
592
|
-
|
|
593
636
|
return extractText(content).length > 0 ? 1 : 0;
|
|
594
637
|
}
|
|
595
638
|
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
} from "./table-properties-xml.ts";
|
|
19
19
|
import { twip } from "./twip.ts";
|
|
20
20
|
import { escapeXmlAttribute } from "./escape-xml-attribute.ts";
|
|
21
|
+
import { buildFrameXml } from "./serialize-paragraph-formatting.ts";
|
|
21
22
|
|
|
22
23
|
export const WORD_FOOTNOTES_CONTENT_TYPE =
|
|
23
24
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml";
|
|
@@ -222,6 +223,11 @@ function buildParagraphPropertiesXml(paragraph: ParagraphNode): string {
|
|
|
222
223
|
if (paragraph.styleId) {
|
|
223
224
|
parts.push(`<w:pStyle w:val="${escapeXmlAttribute(paragraph.styleId)}"/>`);
|
|
224
225
|
}
|
|
226
|
+
// Coord-04 §1.19.d — direct-paragraph framePr (footnotes path).
|
|
227
|
+
{
|
|
228
|
+
const frameXml = buildFrameXml(paragraph.frameProperties);
|
|
229
|
+
if (frameXml) parts.push(frameXml);
|
|
230
|
+
}
|
|
225
231
|
if (paragraph.alignment) {
|
|
226
232
|
parts.push(`<w:jc w:val="${escapeXmlAttribute(paragraph.alignment)}"/>`);
|
|
227
233
|
}
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
} from "./table-properties-xml.ts";
|
|
19
19
|
import { twip } from "./twip.ts";
|
|
20
20
|
import { escapeXmlAttribute } from "./escape-xml-attribute.ts";
|
|
21
|
+
import { buildFrameXml } from "./serialize-paragraph-formatting.ts";
|
|
21
22
|
|
|
22
23
|
export const WORD_HEADER_CONTENT_TYPE =
|
|
23
24
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
|
|
@@ -186,6 +187,11 @@ function buildParagraphPropertiesXml(paragraph: ParagraphNode): string {
|
|
|
186
187
|
if (paragraph.styleId) {
|
|
187
188
|
parts.push(`<w:pStyle w:val="${escapeXmlAttribute(paragraph.styleId)}"/>`);
|
|
188
189
|
}
|
|
190
|
+
// Coord-04 §1.19.d — direct-paragraph framePr (headers/footers path).
|
|
191
|
+
{
|
|
192
|
+
const frameXml = buildFrameXml(paragraph.frameProperties);
|
|
193
|
+
if (frameXml) parts.push(frameXml);
|
|
194
|
+
}
|
|
189
195
|
if (paragraph.spacing) {
|
|
190
196
|
const s = paragraph.spacing;
|
|
191
197
|
const attrs: string[] = [];
|
|
@@ -22,6 +22,7 @@ import { SCOPE_MARKER_BOOKMARK_PREFIX } from "../ooxml/parse-scope-markers.ts";
|
|
|
22
22
|
import { getOpaqueFragment } from "../../preservation/store.ts";
|
|
23
23
|
import { retainRelationshipsForFragment } from "../../preservation/relationship-retention.ts";
|
|
24
24
|
import { serializeParagraphNumberingProperties } from "./serialize-numbering.ts";
|
|
25
|
+
import { buildFrameXml } from "./serialize-paragraph-formatting.ts";
|
|
25
26
|
import {
|
|
26
27
|
serializeTableCellPropertiesXml,
|
|
27
28
|
serializeTablePropertiesXml,
|
|
@@ -716,6 +717,12 @@ function buildParagraphPropertiesXml(paragraph: ParagraphNode): string {
|
|
|
716
717
|
pushOnOffParagraphProperty(children, "keepNext", paragraph.keepNext);
|
|
717
718
|
pushOnOffParagraphProperty(children, "keepLines", paragraph.keepLines);
|
|
718
719
|
pushOnOffParagraphProperty(children, "pageBreakBefore", paragraph.pageBreakBefore);
|
|
720
|
+
// ECMA-376 §17.3.1 canonical slot for framePr: between pageBreakBefore
|
|
721
|
+
// and pBdr. Coord-04 §1.19.d — direct-paragraph path.
|
|
722
|
+
{
|
|
723
|
+
const frameXml = buildFrameXml(paragraph.frameProperties);
|
|
724
|
+
if (frameXml) children.push(frameXml);
|
|
725
|
+
}
|
|
719
726
|
pushOnOffParagraphProperty(children, "widowControl", paragraph.widowControl);
|
|
720
727
|
if (paragraph.outlineLevel !== undefined) {
|
|
721
728
|
children.push(`<w:outlineLvl w:val="${paragraph.outlineLevel}"/>`);
|
|
@@ -93,7 +93,7 @@ function buildSpacingXml(s: ParagraphSpacing | undefined): string {
|
|
|
93
93
|
return attrs.length > 0 ? `<w:spacing ${attrs.join(" ")}/>` : "";
|
|
94
94
|
}
|
|
95
95
|
|
|
96
|
-
function buildFrameXml(f: FrameProperties | undefined): string {
|
|
96
|
+
export function buildFrameXml(f: FrameProperties | undefined): string {
|
|
97
97
|
if (!f) return "";
|
|
98
98
|
// Prefer parsed rawXml when available — preserves extension attributes
|
|
99
99
|
// (`w14:*`, `w15:*`, `mc:Ignorable`) that the typed field set doesn't
|
|
@@ -264,6 +264,7 @@ function normalizeParagraph(
|
|
|
264
264
|
...(paragraph.suppressLineNumbers !== undefined
|
|
265
265
|
? { suppressLineNumbers: paragraph.suppressLineNumbers }
|
|
266
266
|
: {}),
|
|
267
|
+
...(paragraph.frameProperties ? { frameProperties: paragraph.frameProperties } : {}),
|
|
267
268
|
// A.7: preserve w14:paraId / w14:textId across import → export so
|
|
268
269
|
// downstream tools that diff documents by paragraph id stay stable.
|
|
269
270
|
...(paragraph.wordExtensionIds
|
|
@@ -715,9 +716,30 @@ function registerComplexPreviewMedia(
|
|
|
715
716
|
function normalizeHyperlink(node: ParsedHyperlinkNode): {
|
|
716
717
|
type: "hyperlink";
|
|
717
718
|
href: string;
|
|
718
|
-
children: Array<
|
|
719
|
+
children: Array<
|
|
720
|
+
| TextNode
|
|
721
|
+
| { type: "hard_break" }
|
|
722
|
+
| { type: "column_break" }
|
|
723
|
+
| { type: "page_break" }
|
|
724
|
+
| { type: "tab" }
|
|
725
|
+
| { type: "symbol"; char: string; font?: string; marks?: TextMark[] }
|
|
726
|
+
>;
|
|
719
727
|
} {
|
|
720
|
-
|
|
728
|
+
// Canonical `HyperlinkNode.children` accepts the full inline-leaf set
|
|
729
|
+
// (TextNode | HardBreakNode | ColumnBreakNode | PageBreakNode | TabNode |
|
|
730
|
+
// SymbolNode). Matching the canonical shape here keeps rare
|
|
731
|
+
// hyperlink-inside-break patterns (a link spanning a column or page
|
|
732
|
+
// break in Word's output) from silently dropping at the normalize step —
|
|
733
|
+
// same class of drop that `coord-04 §1.19.b` fixed one level up in
|
|
734
|
+
// `normalizeInlineChildren`.
|
|
735
|
+
const children: Array<
|
|
736
|
+
| TextNode
|
|
737
|
+
| { type: "hard_break" }
|
|
738
|
+
| { type: "column_break" }
|
|
739
|
+
| { type: "page_break" }
|
|
740
|
+
| { type: "tab" }
|
|
741
|
+
| { type: "symbol"; char: string; font?: string; marks?: TextMark[] }
|
|
742
|
+
> = [];
|
|
721
743
|
|
|
722
744
|
for (const child of node.children) {
|
|
723
745
|
switch (child.type) {
|
|
@@ -743,6 +765,20 @@ function normalizeHyperlink(node: ParsedHyperlinkNode): {
|
|
|
743
765
|
case "hard_break":
|
|
744
766
|
children.push({ type: "hard_break" });
|
|
745
767
|
break;
|
|
768
|
+
case "column_break":
|
|
769
|
+
children.push({ type: "column_break" });
|
|
770
|
+
break;
|
|
771
|
+
case "page_break":
|
|
772
|
+
children.push({ type: "page_break" });
|
|
773
|
+
break;
|
|
774
|
+
case "symbol":
|
|
775
|
+
children.push({
|
|
776
|
+
type: "symbol",
|
|
777
|
+
char: child.char,
|
|
778
|
+
...(child.font ? { font: child.font } : {}),
|
|
779
|
+
...(child.marks && child.marks.length > 0 ? { marks: child.marks } : {}),
|
|
780
|
+
});
|
|
781
|
+
break;
|
|
746
782
|
}
|
|
747
783
|
}
|
|
748
784
|
|
|
@@ -328,6 +328,37 @@ function parseParagraphElement(
|
|
|
328
328
|
activeComplexField = null;
|
|
329
329
|
}
|
|
330
330
|
pushFieldNode(children, child, "simple");
|
|
331
|
+
} else if (name === "sdt") {
|
|
332
|
+
// coord-11 §22 — structured-document-tag wrapping run-level content
|
|
333
|
+
// inside a header/footer paragraph. Word commonly uses these to
|
|
334
|
+
// bundle the page-number field + decorative drawings (e.g. CCEP's
|
|
335
|
+
// footer "Copyright CCEP STRICTLY CONFIDENTIAL" red rectangle +
|
|
336
|
+
// "Page N" label both sit inside one `<w:sdt>` in footer1.xml).
|
|
337
|
+
// Without this case the sdt was silently dropped at the paragraph
|
|
338
|
+
// walker and every run it carried — including WPS shapes bearing
|
|
339
|
+
// the brand-strip text — never reached the canonical tree.
|
|
340
|
+
// Treat `<w:sdtContent>` as a transparent wrapper and re-process
|
|
341
|
+
// its `<w:r>` / `<w:hyperlink>` / `<w:sdt>` children as if they
|
|
342
|
+
// were direct paragraph children.
|
|
343
|
+
const sdtContent = findChildElementOptional(child, "sdtContent");
|
|
344
|
+
if (sdtContent) {
|
|
345
|
+
for (const grandchild of sdtContent.children) {
|
|
346
|
+
if (grandchild.type !== "element") continue;
|
|
347
|
+
const gname = localName(grandchild.name);
|
|
348
|
+
if (gname === "r") {
|
|
349
|
+
activeComplexField = appendRunNodes(grandchild, children, activeComplexField, sourceXml, opts);
|
|
350
|
+
} else if (gname === "hyperlink") {
|
|
351
|
+
children.push(parseHyperlinkElement(grandchild, opts));
|
|
352
|
+
} else if (gname === "bookmarkStart" || gname === "bookmarkEnd") {
|
|
353
|
+
children.push(parseBookmarkElement(grandchild));
|
|
354
|
+
} else if (gname === "fldSimple") {
|
|
355
|
+
pushFieldNode(children, grandchild, "simple");
|
|
356
|
+
}
|
|
357
|
+
// Nested sdt / other elements ignored — deeper nesting is rare
|
|
358
|
+
// enough that opaque round-trip via the block-level sdt parser
|
|
359
|
+
// handles it if it matters.
|
|
360
|
+
}
|
|
361
|
+
}
|
|
331
362
|
}
|
|
332
363
|
}
|
|
333
364
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type {
|
|
2
2
|
BorderSpec,
|
|
3
3
|
CellShading,
|
|
4
|
+
FrameProperties,
|
|
4
5
|
TextMark,
|
|
5
6
|
ParagraphBorders,
|
|
6
7
|
ParagraphShading,
|
|
@@ -39,6 +40,7 @@ import { parseComplexContentXml, type ChartPartLookup } from "./parse-complex-co
|
|
|
39
40
|
import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
|
|
40
41
|
import { parseObject } from "./parse-object.ts";
|
|
41
42
|
import { parseDrawingFrame } from "./parse-drawing.ts";
|
|
43
|
+
import { readFrameProperties } from "./parse-paragraph-formatting.ts";
|
|
42
44
|
import { classifyFieldInstruction } from "./parse-fields.ts";
|
|
43
45
|
import { parseFFDataFromFldChar } from "./parse-ffdata.ts";
|
|
44
46
|
import { resolveHighlightColor } from "./highlight-colors.ts";
|
|
@@ -217,6 +219,41 @@ function captureGrabBagFromContainer(
|
|
|
217
219
|
export interface ParsedMainDocument {
|
|
218
220
|
blocks: ParsedBlockNode[];
|
|
219
221
|
finalSectionProperties?: SectionProperties;
|
|
222
|
+
/**
|
|
223
|
+
* Aggregate count of cosmetic markers stripped during parse (see
|
|
224
|
+
* {@link ParseMainDocumentOptions.stripCosmeticMarkers}). Keyed by
|
|
225
|
+
* local element name (e.g. `lastRenderedPageBreak`). Absent when no
|
|
226
|
+
* markers were stripped.
|
|
227
|
+
*/
|
|
228
|
+
skippedCosmeticMarkerCounts?: Readonly<Record<string, number>>;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Cosmetic markers that Word re-inserts on reopen and that carry no
|
|
233
|
+
* contract semantics. Stripping them at parse time unblocks
|
|
234
|
+
* `replaceText` on ranges that today cross them as `opaque_inline`
|
|
235
|
+
* boundaries. See `docs/architecture/cosmetic-marker-strip.md`.
|
|
236
|
+
*
|
|
237
|
+
* This is the Phase 1 set. Bookmark-pair stripping (with reference
|
|
238
|
+
* scan) is Phase 2.
|
|
239
|
+
*/
|
|
240
|
+
export const COSMETIC_MARKER_ELEMENT_NAMES: ReadonlySet<string> = new Set([
|
|
241
|
+
"lastRenderedPageBreak",
|
|
242
|
+
"proofErr",
|
|
243
|
+
"noBreakHyphen",
|
|
244
|
+
]);
|
|
245
|
+
|
|
246
|
+
export interface ParseMainDocumentOptions {
|
|
247
|
+
/**
|
|
248
|
+
* When `true` (the default), drops `<w:lastRenderedPageBreak/>`,
|
|
249
|
+
* `<w:proofErr/>`, and `<w:noBreakHyphen/>` during the parse walk
|
|
250
|
+
* instead of emitting them as `opaque_inline` nodes. Counts are
|
|
251
|
+
* reported on {@link ParsedMainDocument.skippedCosmeticMarkerCounts}.
|
|
252
|
+
*
|
|
253
|
+
* Set to `false` to preserve the pre-strip behavior exactly — every
|
|
254
|
+
* cosmetic marker becomes an `opaque_inline` with its source XML.
|
|
255
|
+
*/
|
|
256
|
+
stripCosmeticMarkers?: boolean;
|
|
220
257
|
}
|
|
221
258
|
|
|
222
259
|
export type ParsedBlockNode =
|
|
@@ -256,6 +293,15 @@ export interface ParsedParagraphNode {
|
|
|
256
293
|
bidi?: boolean;
|
|
257
294
|
suppressLineNumbers?: boolean;
|
|
258
295
|
cnfStyle?: string;
|
|
296
|
+
/**
|
|
297
|
+
* `<w:framePr>` declared directly on the paragraph's own `<w:pPr>`.
|
|
298
|
+
* Coord-04 §1.19.d step 2 (inline path). The style-cascade path
|
|
299
|
+
* flows through `CanonicalParagraphFormatting.frameProperties` on
|
|
300
|
+
* the style side; this slot captures the direct-override path so
|
|
301
|
+
* L02 `ParagraphNode.frameProperties` (added 2026-04-24 `4b3ea0b2`)
|
|
302
|
+
* can reach its canonical shape.
|
|
303
|
+
*/
|
|
304
|
+
frameProperties?: FrameProperties;
|
|
259
305
|
/** A.7: preserved w14 extension ids (paraId/textId). */
|
|
260
306
|
wordExtensionIds?: {
|
|
261
307
|
paraId?: string;
|
|
@@ -656,24 +702,61 @@ export function setActiveParseTelemetryBus(bus: ParseTelemetryBus | undefined):
|
|
|
656
702
|
activeParseTelemetryBus = bus;
|
|
657
703
|
}
|
|
658
704
|
|
|
705
|
+
/**
|
|
706
|
+
* Request-scoped cosmetic-marker strip context. Set by
|
|
707
|
+
* `parseMainDocumentXml` for the duration of a single parse; read at
|
|
708
|
+
* the four emission sites in `parseBodyChild` / `parseRun` /
|
|
709
|
+
* `parseRunContentOnly` / `parseRevisionContainer`. Using a module
|
|
710
|
+
* variable instead of threading the flag through ~15 intermediate
|
|
711
|
+
* function signatures keeps the call sites readable; the try/finally
|
|
712
|
+
* in the entry point ensures the variable never leaks across calls.
|
|
713
|
+
*
|
|
714
|
+
* Re-entrancy invariant matches `activeChartPartLookup` above.
|
|
715
|
+
*/
|
|
716
|
+
interface CosmeticStripContext {
|
|
717
|
+
readonly strip: boolean;
|
|
718
|
+
readonly counts: Record<string, number>;
|
|
719
|
+
}
|
|
720
|
+
let activeCosmeticStripContext: CosmeticStripContext | null = null;
|
|
721
|
+
|
|
722
|
+
function noteStrippedCosmeticMarker(tag: string): void {
|
|
723
|
+
if (!activeCosmeticStripContext) return;
|
|
724
|
+
activeCosmeticStripContext.counts[tag] =
|
|
725
|
+
(activeCosmeticStripContext.counts[tag] ?? 0) + 1;
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
function shouldStripCosmeticMarker(): boolean {
|
|
729
|
+
return activeCosmeticStripContext?.strip === true;
|
|
730
|
+
}
|
|
731
|
+
|
|
659
732
|
export function parseMainDocumentXml(
|
|
660
733
|
xml: string,
|
|
661
734
|
relationships: readonly OpcRelationship[] = [],
|
|
662
735
|
mediaParts: ReadonlyMap<string, InlineMediaPart> = new Map(),
|
|
663
736
|
sourcePartPath = "/word/document.xml",
|
|
664
737
|
chartPartLookup?: ChartPartLookup,
|
|
738
|
+
parseOptions: ParseMainDocumentOptions = {},
|
|
665
739
|
): ParsedMainDocument {
|
|
666
740
|
activeChartPartLookup = chartPartLookup;
|
|
741
|
+
const stripContext: CosmeticStripContext = {
|
|
742
|
+
strip: parseOptions.stripCosmeticMarkers !== false,
|
|
743
|
+
counts: Object.create(null) as Record<string, number>,
|
|
744
|
+
};
|
|
745
|
+
activeCosmeticStripContext = stripContext;
|
|
667
746
|
const bus = activeParseTelemetryBus;
|
|
668
747
|
const started = bus?.isEnabled("parse") ? performanceNow() : 0;
|
|
669
748
|
try {
|
|
670
749
|
const result = parseMainDocumentXmlInner(xml, relationships, mediaParts, sourcePartPath);
|
|
750
|
+
if (Object.keys(stripContext.counts).length > 0) {
|
|
751
|
+
result.skippedCosmeticMarkerCounts = Object.freeze({ ...stripContext.counts });
|
|
752
|
+
}
|
|
671
753
|
if (bus?.isEnabled("parse")) {
|
|
672
754
|
emitParseSummary(bus, result, sourcePartPath, performanceNow() - started);
|
|
673
755
|
}
|
|
674
756
|
return result;
|
|
675
757
|
} finally {
|
|
676
758
|
activeChartPartLookup = undefined;
|
|
759
|
+
activeCosmeticStripContext = null;
|
|
677
760
|
}
|
|
678
761
|
}
|
|
679
762
|
|
|
@@ -704,6 +787,13 @@ function emitParseSummary(
|
|
|
704
787
|
blockCount: result.blocks.length,
|
|
705
788
|
blockKindCounts: counts,
|
|
706
789
|
ms,
|
|
790
|
+
// Strip counts are surfaced here (telemetry-only) rather than as a
|
|
791
|
+
// warning on `diagnostics.warnings` — the markers carry no
|
|
792
|
+
// contract semantics and surfacing them in the user-visible
|
|
793
|
+
// warnings feed would be noise. Available to debug UX / tests via
|
|
794
|
+
// the `parse` channel; absent when the feature is disabled or no
|
|
795
|
+
// markers were stripped.
|
|
796
|
+
skippedCosmeticMarkerCounts: result.skippedCosmeticMarkerCounts,
|
|
707
797
|
},
|
|
708
798
|
});
|
|
709
799
|
}
|
|
@@ -1004,6 +1094,7 @@ function parseBodyChild(
|
|
|
1004
1094
|
let bidi: ParsedParagraphNode["bidi"];
|
|
1005
1095
|
let suppressLineNumbers: ParsedParagraphNode["suppressLineNumbers"];
|
|
1006
1096
|
let cnfStyle: ParsedParagraphNode["cnfStyle"];
|
|
1097
|
+
let frameProperties: ParsedParagraphNode["frameProperties"];
|
|
1007
1098
|
let sectionProperties: SectionProperties | undefined;
|
|
1008
1099
|
let sectionPropertiesXml: string | undefined;
|
|
1009
1100
|
let paragraphSupported = true;
|
|
@@ -1050,6 +1141,12 @@ function parseBodyChild(
|
|
|
1050
1141
|
bidi = readOnOffParagraphProperty(child, "bidi");
|
|
1051
1142
|
suppressLineNumbers = readOnOffParagraphProperty(child, "suppressLineNumbers");
|
|
1052
1143
|
cnfStyle = readParagraphCnfStyle(child);
|
|
1144
|
+
{
|
|
1145
|
+
const framePrNode = child.children.find(
|
|
1146
|
+
(c): c is XmlElementNode => c.type === "element" && localName(c.name) === "framePr",
|
|
1147
|
+
);
|
|
1148
|
+
if (framePrNode) frameProperties = readFrameProperties(framePrNode);
|
|
1149
|
+
}
|
|
1053
1150
|
sectionProperties = readSectionPropertiesFromPPr(child);
|
|
1054
1151
|
sectionPropertiesXml = readSectionPropertiesXmlFromPPr(child, sourceXml);
|
|
1055
1152
|
paragraphSupported = paragraphSupported && supportsParagraphProperties(child);
|
|
@@ -1148,6 +1245,10 @@ function parseBodyChild(
|
|
|
1148
1245
|
flushActiveComplexField(children, () => {
|
|
1149
1246
|
activeComplexField = null;
|
|
1150
1247
|
}, activeComplexField);
|
|
1248
|
+
if (shouldStripCosmeticMarker()) {
|
|
1249
|
+
noteStrippedCosmeticMarker("proofErr");
|
|
1250
|
+
break;
|
|
1251
|
+
}
|
|
1151
1252
|
children.push({
|
|
1152
1253
|
type: "opaque_inline",
|
|
1153
1254
|
rawXml: sourceXml.slice(child.start, child.end),
|
|
@@ -1235,6 +1336,7 @@ function parseBodyChild(
|
|
|
1235
1336
|
...(bidi !== undefined ? { bidi } : {}),
|
|
1236
1337
|
...(suppressLineNumbers !== undefined ? { suppressLineNumbers } : {}),
|
|
1237
1338
|
...(cnfStyle ? { cnfStyle } : {}),
|
|
1339
|
+
...(frameProperties ? { frameProperties } : {}),
|
|
1238
1340
|
...(wordExtensionIds ? { wordExtensionIds } : {}),
|
|
1239
1341
|
...(sectionProperties ? { sectionProperties } : {}),
|
|
1240
1342
|
...(sectionPropertiesXml ? { sectionPropertiesXml } : {}),
|
|
@@ -2584,6 +2686,11 @@ function parseRun(
|
|
|
2584
2686
|
}
|
|
2585
2687
|
case "lastRenderedPageBreak":
|
|
2586
2688
|
case "proofErr":
|
|
2689
|
+
case "noBreakHyphen":
|
|
2690
|
+
if (shouldStripCosmeticMarker()) {
|
|
2691
|
+
noteStrippedCosmeticMarker(localName(child.name));
|
|
2692
|
+
break;
|
|
2693
|
+
}
|
|
2587
2694
|
result.push({
|
|
2588
2695
|
type: "opaque_inline",
|
|
2589
2696
|
rawXml: sourceXml.slice(child.start, child.end),
|
|
@@ -2657,12 +2764,23 @@ function parseRevisionContainer(
|
|
|
2657
2764
|
result.push(hyperlink);
|
|
2658
2765
|
break;
|
|
2659
2766
|
}
|
|
2767
|
+
case "proofErr":
|
|
2768
|
+
case "lastRenderedPageBreak":
|
|
2769
|
+
case "noBreakHyphen":
|
|
2770
|
+
if (shouldStripCosmeticMarker()) {
|
|
2771
|
+
noteStrippedCosmeticMarker(localName(child.name));
|
|
2772
|
+
break;
|
|
2773
|
+
}
|
|
2774
|
+
return [
|
|
2775
|
+
{
|
|
2776
|
+
type: "opaque_inline",
|
|
2777
|
+
rawXml: sourceXml.slice(node.start, node.end),
|
|
2778
|
+
},
|
|
2779
|
+
];
|
|
2660
2780
|
case "commentRangeStart":
|
|
2661
2781
|
case "commentRangeEnd":
|
|
2662
2782
|
case "bookmarkStart":
|
|
2663
2783
|
case "bookmarkEnd":
|
|
2664
|
-
case "proofErr":
|
|
2665
|
-
case "lastRenderedPageBreak":
|
|
2666
2784
|
return [
|
|
2667
2785
|
{
|
|
2668
2786
|
type: "opaque_inline",
|
|
@@ -2835,10 +2953,17 @@ function parseRunContentOnly(
|
|
|
2835
2953
|
case "commentReference":
|
|
2836
2954
|
case "lastRenderedPageBreak":
|
|
2837
2955
|
case "proofErr":
|
|
2956
|
+
case "noBreakHyphen": {
|
|
2957
|
+
const tag = localName(child.name);
|
|
2958
|
+
if (shouldStripCosmeticMarker() && tag !== "commentReference") {
|
|
2959
|
+
noteStrippedCosmeticMarker(tag);
|
|
2960
|
+
break;
|
|
2961
|
+
}
|
|
2838
2962
|
if (options.preserveUnsupportedReviewMarkup) {
|
|
2839
2963
|
return { nodes: [], supported: false };
|
|
2840
2964
|
}
|
|
2841
2965
|
break;
|
|
2966
|
+
}
|
|
2842
2967
|
default:
|
|
2843
2968
|
return { nodes: [], supported: false };
|
|
2844
2969
|
}
|
|
@@ -204,7 +204,7 @@ function readShading(node: XmlElementNode): ParagraphShading | undefined {
|
|
|
204
204
|
* The typed attributes cover the CCEP cases we've seen (2-column inset
|
|
205
205
|
* text frames, drop-caps); extension attrs are rare in that corpus.
|
|
206
206
|
*/
|
|
207
|
-
function readFrameProperties(node: XmlElementNode): FrameProperties | undefined {
|
|
207
|
+
export function readFrameProperties(node: XmlElementNode): FrameProperties | undefined {
|
|
208
208
|
const out: FrameProperties = {};
|
|
209
209
|
const width = readIntAttr(node, "w:w");
|
|
210
210
|
if (width !== undefined) out.widthTwips = width;
|